library(ggplot2)
library(vegan)
library(colorRamps)
library(psych)
library(tidyr)
library(readxl)
library(ape)
library(ggplot2)
library(colorRamps)
library(ggrepel)
rm(list = ls())
load("data/CForBio.data.prep.2022.11.22.RData")
library(vegan)
cutoff <- 1000
sort(colSums(bac.amp1))
## BD1623 DL1904 NG14 XS0601 BTM1515 GT0910 NG7 BD1512 TT1419 CB2424
## 24903 26972 27053 27234 29143 31098 33452 34090 34339 34770
## GT2515 TT0302 DHS0824 HS1433 HS1531 TT1201 HS2325 BTM2317 GT0206 DHS0911
## 34826 35210 35306 35369 35611 35751 36395 36829 36923 37867
## LS11 DHS0808 BTM1323 NG8 CB2009 XS0809 DL0810 LS9 DL0906 XS2022
## 39954 41573 42171 42958 43395 45875 45977 46174 46676 47364
## GH1 GH4 BD1203 LS15 CB0122 GH2
## 48535 54597 55685 60392 65051 65829
sort(colSums(bac.mgm1))
## HS1433 BD1623 NG7 XS2022 DHS0824 DHS0808 DL1904 BTM1323
## 9115574 9597402 9667544 9737388 9875341 10013817 10016208 10110020
## TT1419 HS2325 GT0910 DL0810 GT2515 GT0206 TT0302 NG8
## 10160188 10164701 10195067 10301253 10395324 10534807 10643744 10648853
## XS0809 HS1531 LS11 CB2424 BTM2317 XS0601 GH1 DL0906
## 10675545 10684100 10970718 11115704 11143274 11320447 11461977 11544870
## BD1512 GH2 NG14 DHS0911 CB0122 CB2009 BD1203 TT1201
## 11738899 11750163 11847992 11987853 12237915 12559810 12632900 12911304
## LS9 LS15 GH4 BTM1515
## 13903753 14521553 14922282 15939544
sort(colSums(ko.mgm1))
## HS1433 HS1531 BD1512 HS2325 DHS0911 BD1203 CB0122 XS2022
## 516612.0 554923.2 561838.9 564412.5 570647.8 572166.7 578939.1 581887.7
## BD1623 TT1419 GH4 XS0809 CB2009 TT0302 DHS0824 GT0206
## 585609.5 588950.4 590928.8 591823.2 595951.2 597472.6 597523.9 605316.2
## GT0910 DHS0808 XS0601 TT1201 LS9 LS15 NG8 GT2515
## 607017.0 611526.6 612949.0 618809.8 618974.8 621618.7 623901.4 624726.0
## NG14 NG7 BTM1323 BTM1515 LS11 BTM2317 CB2424 GH2
## 626956.5 630290.3 640564.2 640670.3 653233.2 657988.6 661383.3 664885.4
## GH1 DL0906 DL0810 DL1904
## 670864.8 682216.2 709302.7 712869.3
sort(colSums(cog.mgm1))
## HS1433 BD1512 CB0122 DHS0911 HS1531 BD1623 HS2325 GH4
## 665137.6 700954.6 701771.9 706113.6 707706.2 717680.3 719426.8 719907.9
## BD1203 TT1419 XS2022 CB2009 XS0809 NG8 TT0302 LS9
## 720810.2 735415.6 736115.9 736728.3 737294.4 738141.8 741127.0 741129.8
## DHS0824 NG14 GT0206 NG7 LS15 GT0910 DHS0808 XS0601
## 742211.5 745558.0 745871.5 746178.6 747159.8 748678.5 749599.8 753834.3
## BTM2317 BTM1323 BTM1515 TT1201 LS11 GT2515 CB2424 DL0906
## 761668.5 762288.9 766129.1 769869.2 772483.2 780307.3 785374.0 785610.6
## GH2 GH1 DL1904 DL0810
## 806012.5 810165.7 822934.2 825360.4
sort(colSums(rgi.mgm1))
## GH4 CB0122 CB2009 NG8 BD1623 NG7 NG14 DL0906
## 20995.21 23022.58 24293.34 24840.47 25081.28 25099.30 25294.91 25324.26
## BD1512 LS15 HS1433 LS9 TT1419 LS11 BD1203 BTM2317
## 25426.07 25842.94 25856.23 25965.01 26532.75 26701.09 26808.88 27092.64
## DHS0911 TT0302 XS0601 XS0809 CB2424 XS2022 GT0206 BTM1515
## 27105.60 27138.77 27223.78 27593.08 27629.52 27753.26 27775.72 27778.20
## GT0910 TT1201 GH2 DL0810 DL1904 HS2325 DHS0824 DHS0808
## 28043.34 28054.02 28314.02 28403.45 28474.85 28496.89 28701.60 28718.88
## GH1 HS1531 BTM1323 GT2515
## 28730.37 29012.93 29564.26 30208.16
sort(colSums(cazy.mgm1))
## BD1623 GH4 BD1512 NG8 NG7 NG14 CB0122 XS0601
## 13535.55 13791.28 14072.59 14476.19 14543.41 14716.72 14718.56 14858.62
## CB2009 BD1203 GT0910 GT0206 DL0906 CB2424 LS9 LS15
## 15158.24 15372.90 15421.68 15423.15 15490.17 15512.52 15621.44 15661.32
## LS11 BTM2317 XS0809 HS1433 HS2325 XS2022 DL1904 DL0810
## 15742.96 15933.04 16022.71 16071.16 16630.05 16715.92 16996.28 17006.50
## GH2 GT2515 DHS0911 GH1 HS1531 BTM1515 TT1419 DHS0824
## 17012.91 17096.55 17106.44 17144.00 17227.44 17361.87 17600.44 17729.42
## BTM1323 TT0302 TT1201 DHS0808
## 17929.15 18253.39 18461.59 18792.74
bac.amp <- rrarefy(t(bac.amp1),sample = 24903 )
bac.mgm <- data.frame(t(bac.mgm1))
ko.mgm <- data.frame(t(ko.mgm1))
cog.mgm <- data.frame(t(cog.mgm1))
rgi.mgm <- data.frame(t(rgi.mgm1))
cazy.mgm <- data.frame(t(cazy.mgm1))
env.amp1$rgi.mgm.H <- vegan::diversity(rgi.mgm)
env.amp1$cazy.mgm.H <- vegan::diversity(cazy.mgm)
env.amp1$ko.mgm.H <- vegan::diversity(ko.mgm)
env.amp1$cog.mgm.H <- vegan::diversity(cog.mgm)
env.amp1$bac.mgm.H <- vegan::diversity(bac.mgm)
env.amp1$bac.amp.H <- vegan::diversity(bac.amp)
env.amp1$rgi.mgm.S <- vegan::specnumber(rgi.mgm)
env.amp1$cazy.mgm.S <- vegan::specnumber(cazy.mgm)
env.amp1$ko.mgm.S <- vegan::specnumber(ko.mgm)
env.amp1$cog.mgm.S <- vegan::specnumber(cog.mgm)
env.amp1$bac.mgm.S <- vegan::specnumber(bac.mgm)
env.amp1$bac.amp.S <- vegan::specnumber(bac.amp)
env.amp1$cog.mgm.sum <- rowSums(cog.mgm)
env.amp1$rgi.mgm.sum <- rowSums(rgi.mgm)
env.amp1$cazy.mgm.sum <- rowSums(cazy.mgm)
env.amp1$ko.mgm.sum <- rowSums(ko.mgm)
env.amp1$bac.mgm.sum <- rowSums(bac.mgm)
#genomeTraits
genomeTrait<-read_excel("data/genomeTrait/genomesize.contigsprok/genomesize_novirus/all_genomesize.novirus.xlsx",sheet = "Sheet2")
genomeTrait$sample_name == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.genomeTraits<-cbind(env.amp1,genomeTrait[,c(2:8)])
#Supplementary Fig. 1
#ordination
library(ape)
library(ggplot2)
library(colorRamps)
#bac.amplicon
#calculate distance matrix
set.seed(315)
bac.amp.dist<-vegdist(bac.amp,method = 'bray')
#cmdscale function
bac.amp.pc<-cmdscale(bac.amp.dist,k=(nrow(bac.amp)-1),eig = TRUE)
eig<-bac.amp.pc$eig
bac.amp.pc1<-(eig[1]*100/sum(eig))
bac.amp.pc2<-(eig[2]*100/sum(eig))
#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#envfit analysis
ef<-envfit(bac.amp.pc,env.sub,permutations=999,na.rm=TRUE)
ef
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## Latitude 0.64434 -0.76474 0.7235 0.001 ***
## Longitude 0.49235 -0.87040 0.2699 0.004 **
## Altitude 0.64390 -0.76511 0.2947 0.006 **
## MAP -0.92840 0.37159 0.7042 0.001 ***
## MAT -0.63470 0.77276 0.6747 0.001 ***
## TC 0.43316 -0.90132 0.3550 0.003 **
## TN 0.89899 -0.43797 0.2195 0.027 *
## TP 0.99948 -0.03216 0.5093 0.001 ***
## pH 0.65402 0.75648 0.7467 0.001 ***
## ACa 0.88251 0.47029 0.6914 0.001 ***
## AMg 0.64494 0.76423 0.6420 0.001 ***
## AFe 0.34361 0.93911 0.3927 0.001 ***
## AK -0.32032 -0.94731 0.1072 0.150
## C_N -0.03667 -0.99933 0.4605 0.001 ***
## C_P -0.91840 -0.39565 0.5110 0.001 ***
## N_P -0.99449 -0.10488 0.5432 0.001 ***
## Soil bulk density -0.57728 -0.81655 0.2642 0.006 **
## Plant abundance -0.91311 0.40772 0.0973 0.181
## Plant basal area -0.09837 -0.99515 0.1916 0.035 *
## Plant richness -0.85613 0.51676 0.5796 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.bact.amp<-data.frame(bac.amp.pc$points)[1:35]
pcoa.scores.bact.amp$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.bact.amp$X1)); m2<-max(abs(pcoa.scores.bact.amp$X2))
#extract envfit result
en_coord_cont.bact.amp<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.bact.amp
## Dim1 Dim2
## Latitude 0.56013924 -0.66481177
## Longitude 0.26144377 -0.46219132
## Altitude 0.35722591 -0.42447430
## MAP -0.79626246 0.31870204
## MAT -0.53284655 0.64875262
## TC 0.26377208 -0.54885284
## TN 0.43044578 -0.20970404
## TP 0.72899299 -0.02346002
## pH 0.57761811 0.66810497
## ACa 0.75000000 0.39967560
## AMg 0.52813385 0.62582498
## AFe 0.22006155 0.60145287
## AK -0.10720886 -0.31706334
## C_N -0.02543437 -0.69306637
## C_P -0.67095480 -0.28904619
## N_P -0.74912718 -0.07900155
## Soil bulk density -0.30328553 -0.42898694
## Plant abundance -0.29113451 0.12999647
## Plant basal area -0.04401280 -0.44525338
## Plant richness -0.66616579 0.40209693
en_coord_cont.bact.amp$envfactor<-rownames(en_coord_cont.bact.amp)
p.bact = ggplot(data = pcoa.scores.bact.amp, aes(x = X1, y = X2)) +
geom_point(size=5,mapping = aes(color=Site.latitude))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m1),arrow = arrow(length = unit(0.02, "npc")),
data = en_coord_cont.bact.amp, size =0.6, colour = "grey")+
geom_text_repel(data = en_coord_cont.bact.amp, aes(x=Dim1*m1,y=Dim2*m1,label = envfactor),
colour="black",size=3)+
ggtitle("Bacteria.16S.Amplicon")+
labs(x = sprintf("PCo1 (%.1f%%)", bac.amp.pc1), y = sprintf("PCo2 (%.1f%%)", bac.amp.pc2))+
theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
legend.title = element_text(size=12))
p.bact

#ggsave("pdf2/Fig.S5a_2.PcoA.bact.composition.2023.08.24.pdf", width = 6, height = 4)
#bac.mgm
set.seed(315)
bac.mgm.dist<-vegdist(bac.mgm,method = 'bray')
#cmdscale function
bac.mgm.pc<-cmdscale(bac.mgm.dist,k=(nrow(bac.mgm)-1),eig = TRUE)
eig<-bac.mgm.pc$eig
bac.mgm.pc1<-(eig[1]*100/sum(eig))
bac.mgm.pc2<-(eig[2]*100/sum(eig))
#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#envfit analysis
ef<-envfit(bac.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## Latitude 0.42080 0.90715 0.5000 0.001 ***
## Longitude 0.31234 0.94997 0.1370 0.079 .
## Altitude 0.23115 0.97292 0.1535 0.067 .
## MAP -0.67158 -0.74093 0.6670 0.001 ***
## MAT -0.40495 -0.91434 0.4457 0.002 **
## TC 0.30022 0.95387 0.1873 0.029 *
## TN 0.78687 0.61712 0.2139 0.024 *
## TP 0.87349 0.48684 0.6527 0.001 ***
## pH 0.97217 -0.23426 0.8676 0.001 ***
## ACa 0.99977 0.02166 0.8189 0.001 ***
## AMg 0.97346 -0.22886 0.7548 0.001 ***
## AFe 0.85316 -0.52164 0.4543 0.001 ***
## AK -0.95701 0.29007 0.1260 0.116
## C_N -0.39574 0.91836 0.2544 0.008 **
## C_P -0.91325 -0.40740 0.5970 0.001 ***
## N_P -0.83879 -0.54446 0.6023 0.001 ***
## Soil bulk density -0.98603 -0.16654 0.2178 0.021 *
## Plant abundance -0.48348 -0.87535 0.1636 0.048 *
## Plant basal area -0.34464 0.93873 0.3233 0.004 **
## Plant richness -0.66758 -0.74453 0.4741 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.bact.mgm<-data.frame(bac.mgm.pc$points)[1:26]
pcoa.scores.bact.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.bact.mgm$X1)); m2<-max(abs(pcoa.scores.bact.mgm$X2))
#extract envfit result
en_coord_cont.bact.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.bact.mgm
## Dim1 Dim2
## Latitude 0.24645096 0.53129711
## Longitude 0.09576549 0.29126990
## Altitude 0.07499738 0.31567302
## MAP -0.45428401 -0.50119297
## MAT -0.22390720 -0.50555672
## TC 0.10762124 0.34194357
## TN 0.30138685 0.23637169
## TP 0.58448378 0.32576319
## pH 0.75000000 -0.18072762
## ACa 0.74935727 0.01623147
## AMg 0.70047163 -0.16467795
## AFe 0.47629421 -0.29121659
## AK -0.28135312 0.08527704
## C_N -0.16533269 0.38367230
## C_P -0.58443859 -0.26072127
## N_P -0.53915354 -0.34996348
## Soil bulk density -0.38115403 -0.06437582
## Plant abundance -0.16194919 -0.29321340
## Plant basal area -0.16231546 0.44211469
## Plant richness -0.38071526 -0.42459937
en_coord_cont.bact.mgm$envfactor<-rownames(en_coord_cont.bact.mgm)
p.bact.mgm = ggplot(data = pcoa.scores.bact.mgm, aes(x = X1, y = X2)) +
geom_point(size=5,mapping = aes(color=Site.latitude))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
data = en_coord_cont.bact.mgm, size =0.6, colour = "grey")+
geom_text_repel(data = en_coord_cont.bact.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
colour="black",size=3)+
ggtitle("Bacteria.Metagenome.Shotgun")+
labs(x = sprintf("PCo1 (%.1f%%)", bac.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", bac.mgm.pc2))+
theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
legend.title = element_text(size=12))
p.bact.mgm

#ggsave("pdf2/Fig.S5b.PcoA.bact.composition.2023.08.24.pdf", width = 6, height = 4)
#ko.mgm
set.seed(315)
ko.mgm.dist<-vegdist(ko.mgm,method = 'bray')
#cmdscale function
ko.mgm.pc<-cmdscale(ko.mgm.dist,k=(nrow(ko.mgm)-1),eig = TRUE)
eig<-ko.mgm.pc$eig
ko.mgm.pc1<-(eig[1]*100/sum(eig))
ko.mgm.pc2<-(eig[2]*100/sum(eig))
#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#envfit analysis
ef<-envfit(ko.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## Latitude 0.58576 0.81049 0.3834 0.001 ***
## Longitude 0.33285 0.94298 0.1808 0.038 *
## Altitude 0.44348 0.89628 0.0827 0.222
## MAP -0.93189 -0.36274 0.5043 0.001 ***
## MAT -0.57871 -0.81554 0.3213 0.004 **
## TC 0.54793 0.83652 0.1222 0.104
## TN 0.87435 -0.48529 0.1904 0.028 *
## TP 0.89319 -0.44967 0.5693 0.001 ***
## pH 0.87604 -0.48223 0.8325 0.001 ***
## ACa 0.99333 0.11529 0.8480 0.001 ***
## AMg 0.90581 -0.42369 0.7441 0.001 ***
## AFe 0.45755 -0.88919 0.6142 0.001 ***
## AK -0.20527 0.97871 0.3309 0.003 **
## C_N -0.15276 0.98826 0.4927 0.001 ***
## C_P -0.86484 0.50206 0.6099 0.001 ***
## N_P -0.98352 0.18080 0.5977 0.001 ***
## Soil bulk density -0.50795 0.86139 0.2628 0.007 **
## Plant abundance -0.72252 0.69135 0.0527 0.399
## Plant basal area -0.84294 -0.53801 0.2144 0.020 *
## Plant richness -0.71641 -0.69768 0.4920 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.ko.mgm<-data.frame(ko.mgm.pc$points)[1:35]
pcoa.scores.ko.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.ko.mgm$X1)); m2<-max(abs(pcoa.scores.ko.mgm$X2))
#extract envfit result
en_coord_cont.ko.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.ko.mgm
## Dim1 Dim2
## Latitude 0.29738774 0.4114841
## Longitude 0.11602764 0.3287106
## Altitude 0.10453889 0.2112753
## MAP -0.54258098 -0.2112004
## MAT -0.26895054 -0.3790165
## TC 0.15702258 0.2397254
## TN 0.31280018 -0.1736139
## TP 0.55256466 -0.2781863
## pH 0.65535896 -0.3607530
## ACa 0.75000000 0.0870472
## AMg 0.64067157 -0.2996765
## AFe 0.29401317 -0.5713778
## AK -0.09680930 0.4615829
## C_N -0.08791761 0.5687589
## C_P -0.55378353 0.3214828
## N_P -0.62346477 0.1146108
## Soil bulk density -0.21349078 0.3620435
## Plant abundance -0.13594752 0.1300820
## Plant basal area -0.31998559 -0.2042320
## Plant richness -0.41199512 -0.4012266
en_coord_cont.ko.mgm$envfactor<-rownames(en_coord_cont.ko.mgm)
p.ko.mgm = ggplot(data = pcoa.scores.ko.mgm, aes(x = X1, y = X2)) +
geom_point(size=5,mapping = aes(color=Site.latitude))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
data = en_coord_cont.ko.mgm, size =0.6, colour = "grey")+
geom_text_repel(data = en_coord_cont.ko.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
colour="black",size=3)+
ggtitle("KO.Metagenome")+
labs(x = sprintf("PCo1 (%.1f%%)", ko.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", ko.mgm.pc2))+
theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
legend.title = element_text(size=12))
p.ko.mgm

#ggsave("pdf2/Fig.S5c.PcoA.KO.composition.2023.08.24.pdf", width = 6, height = 4)
#rgi.mgm
set.seed(315)
rgi.mgm.dist<-vegdist(rgi.mgm,method = 'bray')
#cmdscale function
rgi.mgm.pc<-cmdscale(rgi.mgm.dist,k=(nrow(rgi.mgm)-1),eig = TRUE)
eig<-rgi.mgm.pc$eig
rgi.mgm.pc1<-(eig[1]*100/sum(eig))
rgi.mgm.pc2<-(eig[2]*100/sum(eig))
#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#envfit analysis
ef<-envfit(rgi.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## Latitude 0.48603 -0.87394 0.4106 0.001 ***
## Longitude 0.69607 -0.71798 0.0854 0.219
## Altitude 0.09595 -0.99539 0.3231 0.005 **
## MAP -0.66337 0.74829 0.6141 0.001 ***
## MAT -0.44612 0.89497 0.3772 0.002 **
## TC 0.76116 -0.64856 0.1173 0.112
## TN 0.99999 -0.00390 0.2730 0.007 **
## TP 0.94737 -0.32014 0.6591 0.001 ***
## pH 0.84127 -0.54061 0.7804 0.001 ***
## ACa 0.78041 -0.62527 0.8867 0.001 ***
## AMg 0.92829 -0.37186 0.7052 0.001 ***
## AFe 0.95377 0.30055 0.4009 0.001 ***
## AK -0.75505 -0.65567 0.1376 0.085 .
## C_N -0.49333 -0.86984 0.1969 0.032 *
## C_P -0.99442 0.10552 0.6067 0.001 ***
## N_P -0.95648 0.29179 0.5950 0.001 ***
## Soil bulk density -0.64732 0.76222 0.1931 0.023 *
## Plant abundance -0.99126 0.13192 0.0668 0.329
## Plant basal area -0.76782 -0.64067 0.1976 0.031 *
## Plant richness -0.65745 0.75350 0.5007 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.rgi.mgm<-data.frame(rgi.mgm.pc$points)[1:31]
pcoa.scores.rgi.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.rgi.mgm$X1)); m2<-max(abs(pcoa.scores.rgi.mgm$X2))
#extract envfit result
en_coord_cont.rgi.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.rgi.mgm
## Dim1 Dim2
## Latitude 0.29963680 -0.538781892
## Longitude 0.19568448 -0.201844240
## Altitude 0.05246991 -0.544347073
## MAP -0.50017239 0.564203448
## MAT -0.26360889 0.528836280
## TC 0.25080804 -0.213704571
## TN 0.50271028 -0.001960014
## TP 0.73998653 -0.250059560
## pH 0.71501770 -0.459482058
## ACa 0.70702555 -0.566471045
## AMg 0.75000000 -0.300441453
## AFe 0.58099901 0.183084892
## AK -0.26946797 -0.233998181
## C_N -0.21061936 -0.371363936
## C_P -0.74522191 0.079078145
## N_P -0.70985409 0.216555927
## Soil bulk density -0.27365158 0.322226473
## Plant abundance -0.24650471 0.032804771
## Plant basal area -0.32837179 -0.273992055
## Plant richness -0.44758618 0.512972997
en_coord_cont.rgi.mgm$envfactor<-rownames(en_coord_cont.rgi.mgm)
p.rgi.mgm = ggplot(data = pcoa.scores.rgi.mgm, aes(x = X1, y = X2)) +
geom_point(size=5,mapping = aes(color=Site.latitude))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
data = en_coord_cont.rgi.mgm, size =0.6, colour = "grey")+
geom_text_repel(data = en_coord_cont.rgi.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
colour="black",size=3)+
ggtitle("ARG.Metagenome")+
labs(x = sprintf("PCo1 (%.1f%%)", rgi.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", rgi.mgm.pc2))+
theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
legend.title = element_text(size=12))
p.rgi.mgm

#ggsave("pdf2/Fig.S5d.PcoA.ARG.composition.2023.08.24.pdf", width = 6, height = 4)
#cazy.mgm
set.seed(315)
cazy.mgm.dist<-vegdist(cazy.mgm,method = 'bray')
#cmdscale function
cazy.mgm.pc<-cmdscale(cazy.mgm.dist,k=(nrow(cazy.mgm)-1),eig = TRUE)
eig<-cazy.mgm.pc$eig
cazy.mgm.pc1<-(eig[1]*100/sum(eig))
cazy.mgm.pc2<-(eig[2]*100/sum(eig))
#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#envfit analysis
ef<-envfit(cazy.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## Latitude -0.72585 0.68786 0.3686 0.002 **
## Longitude -0.43210 0.90182 0.1540 0.055 .
## Altitude -0.99984 0.01814 0.0825 0.250
## MAP 0.61327 -0.78987 0.6373 0.001 ***
## MAT 0.82086 -0.57114 0.3090 0.008 **
## TC -0.87596 0.48238 0.1324 0.084 .
## TN -0.98160 0.19094 0.2999 0.005 **
## TP -0.96307 0.26925 0.6831 0.001 ***
## pH -0.89688 0.44227 0.7826 0.001 ***
## ACa -0.73360 0.67958 0.8802 0.001 ***
## AMg -0.88603 0.46363 0.6596 0.001 ***
## AFe -0.99401 -0.10928 0.4367 0.001 ***
## AK 0.99703 0.07696 0.1005 0.181
## C_N 0.94472 0.32788 0.0635 0.344
## C_P 0.93341 -0.35880 0.5566 0.001 ***
## N_P 0.92667 -0.37588 0.5655 0.001 ***
## Soil bulk density 0.91417 0.40533 0.2858 0.005 **
## Plant abundance 0.53150 -0.84706 0.1000 0.176
## Plant basal area 0.25323 -0.96741 0.1722 0.052 .
## Plant richness 0.62642 -0.77948 0.5625 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.cazy.mgm<-data.frame(cazy.mgm.pc$points)[1:24]
pcoa.scores.cazy.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.cazy.mgm$X1)); m2<-max(abs(pcoa.scores.cazy.mgm$X2))
#extract envfit result
en_coord_cont.cazy.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.cazy.mgm
## Dim1 Dim2
## Latitude -0.4742840 0.449458516
## Longitude -0.1825317 0.380955191
## Altitude -0.3091767 0.005610774
## MAP 0.5269460 -0.678692841
## MAT 0.4910803 -0.341684028
## TC -0.3430300 0.188900061
## TN -0.5786084 0.112548294
## TP -0.8566943 0.239512269
## pH -0.8539685 0.421105139
## ACa -0.7407636 0.686216101
## AMg -0.7744690 0.405257277
## AFe -0.7069852 -0.077727317
## AK 0.3402142 0.026261368
## C_N 0.2562756 0.088943073
## C_P 0.7495152 -0.288109202
## N_P 0.7500000 -0.304220198
## Soil bulk density 0.5259784 0.233209587
## Plant abundance 0.1808565 -0.288234538
## Plant basal area 0.1131034 -0.432089601
## Plant richness 0.5056624 -0.629219503
en_coord_cont.cazy.mgm$envfactor<-rownames(en_coord_cont.cazy.mgm)
p.cazy.mgm = ggplot(data = pcoa.scores.cazy.mgm, aes(x = X1, y = X2)) +
geom_point(size=5,mapping = aes(color=Site.latitude))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
data = en_coord_cont.cazy.mgm, size =0.6, colour = "grey")+
geom_text_repel(data = en_coord_cont.cazy.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
colour="black",size=3)+
ggtitle("CAZy.Metagenome")+
labs(x = sprintf("PCo1 (%.1f%%)", cazy.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", cazy.mgm.pc2))+
theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
legend.title = element_text(size=12))
p.cazy.mgm

#ggsave("pdf2/Fig.S5e.PcoA.CAZy.composition.2023.08.24.pdf", width = 6, height = 4)
#Supplementary Fig. 2
library(readxl)
#genomesize of otu
genomesize_otu<-read_excel("data/otu_genome/calculate/genomesize_otu.xlsx")
genomesize_otu$sample_name == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
genomesize_otu<-cbind(genomesize_otu,env.amp1[,c(29,82)])
library(ggplot2)
library(colorRamps)
summary(lm(AGS_otu~pH,genomesize_otu))
##
## Call:
## lm(formula = AGS_otu ~ pH, data = genomesize_otu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30434 -0.10159 -0.00056 0.09489 0.36679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.46410 0.12243 44.632 <2e-16 ***
## pH -0.06557 0.02440 -2.688 0.0111 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1493 on 34 degrees of freedom
## Multiple R-squared: 0.1752, Adjusted R-squared: 0.151
## F-statistic: 7.223 on 1 and 34 DF, p-value: 0.01106
shapiro.test(residuals(lm(AGS_otu~pH,genomesize_otu)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(AGS_otu ~ pH, genomesize_otu))
## W = 0.98375, p-value = 0.8632
p15<-ggplot() + geom_jitter(data=genomesize_otu, height = 0, aes(x= pH, y=AGS_otu, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=genomesize_otu,aes(x= pH, y=AGS_otu), method ="lm", col= "white")+
labs(x = "pH",y = "Average Genome Size (Mbp)")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
theme(legend.title = element_text(colour="black", size=12, face="bold"),
legend.text = element_text(colour="black", size=12, face="bold"),
axis.text=element_text(size=12,face="bold"),
axis.title=element_text(size=12,face="bold"))+
annotate("text",x=6.5,y=5.5, label="R2 = 0.151\nP = 0.01106",
size=5,color="black")+ggtitle("Based on GTDB Annotation")
p15
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(Protein_count~AGS_otu,genomesize_otu))
##
## Call:
## lm(formula = Protein_count ~ AGS_otu, data = genomesize_otu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.949 -13.864 3.517 12.874 55.741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 432.18 117.35 3.683 0.000795 ***
## AGS_otu 839.09 22.81 36.784 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.87 on 34 degrees of freedom
## Multiple R-squared: 0.9755, Adjusted R-squared: 0.9748
## F-statistic: 1353 on 1 and 34 DF, p-value: < 2.2e-16
shapiro.test(residuals(lm(Protein_count~AGS_otu,genomesize_otu)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(Protein_count ~ AGS_otu, genomesize_otu))
## W = 0.97453, p-value = 0.5615
p17<-ggplot() + geom_jitter(data=genomesize_otu, height = 0, aes(x= AGS_otu, y=Protein_count, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=genomesize_otu,aes(x= AGS_otu, y=Protein_count), method ="lm", col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "Protein Count")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
theme(legend.title = element_text(colour="black", size=12, face="bold"),
legend.text = element_text(colour="black", size=12, face="bold"),
axis.text=element_text(size=12,face="bold"),
axis.title=element_text(size=12,face="bold"))+
annotate("text",x=5,y=5000, label="R2 = 0.975\nP < 2.2e-16",
size=5,color="black")+ggtitle("Based on GTDB Annotation")
p17
## `geom_smooth()` using formula = 'y ~ x'

#Supplementary Fig. 2
library(ggpubr)
##
## 载入程辑包:'ggpubr'
## The following object is masked from 'package:ape':
##
## rotate
ggarrange(p15,p17,ncol = 2,nrow = 1,
labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Fig.S3_genometraits_otu.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary Fig.3
data1<-data.frame(t(bac.amp))
ID<-data.frame(bac.amp.ID1)
ID$OTU.ID<-paste(ID$OTU,ID$phylum,sep = ".")
env.amp1$pH2<-paste("pH",round(env.amp1$pH,digits = 2),env.amp1$sample_name,sep = ".")
opf<-"genus"
Flev<-ID[,opf]
fung.lev<-data.frame(aggregate(data1,by=list(Flev) , sum))
rownames(fung.lev)<-fung.lev[,1]; fung.lev<-fung.lev[,-1]
data2<-data.frame(t(fung.lev))
total<-apply(data2, 1, sum);
fung.relabu<-data.frame(lapply(data2, function(x) { x / total }) ) # change the abundance data into relative abundance#
fung.L<-fung.relabu
env.L<-env.amp1
fung.L <- fung.L[,order(-colSums(fung.L))] ##
bac.genus<-cbind(env.L,fung.L)
colnames(bac.genus)
## [1] "sample_name" "otu_id"
## [3] "chenl_sample" "site"
## [5] "location" "plot_NO"
## [7] "sample_date" "site_NO"
## [9] "forest_type" "Region"
## [11] "MAT" "MAP"
## [13] "latitude" "longitude"
## [15] "altitude" "Coordinate.X"
## [17] "Coordinate.Y" "TN"
## [19] "TC" "ACa"
## [21] "AFe" "AK"
## [23] "AMg" "TP"
## [25] "C_N" "C_P"
## [27] "N_P" "soil_D"
## [29] "pH" "Soil_PC1"
## [31] "Soil_PC2" "Soil_PC3"
## [33] "Soil_PC4" "Soil_PC5"
## [35] "Soil_PC6" "plant_community_order"
## [37] "AllP_abu" "AMP_abu"
## [39] "NonAMP_abu" "AllP_rich"
## [41] "AMP_rich" "NonAMP_rich"
## [43] "AllP_genus_rich" "AMP_genus_rich"
## [45] "NonAMP_genus_rich" "AllP_family_rich"
## [47] "AMP_family_rich" "NonAMP_family_rich"
## [49] "AllP_bsa" "AMP_bsa"
## [51] "NonAMP_bsa" "AllP.PD"
## [53] "AllP.mpd" "AllP.NRI"
## [55] "AllP.mntd" "AllP.NTI"
## [57] "AMP.PD" "AMP.mpd.obs"
## [59] "AMP.NRI" "AMP.mntd.obs"
## [61] "AMP.NTI" "NonAMP.PD"
## [63] "NonAMP.mpd" "NonAMP.NRI"
## [65] "NonAMP.mntd.obs" "NonAMP.NTI"
## [67] "AllP.bi.mpd" "AllP.bi.NRI"
## [69] "AllP.bi.mntd" "AllP.bi.NTI"
## [71] "AMP.bi.mpd" "AMP.bi.NRI"
## [73] "AMP.bi.mntd" "AMP.bi.NTI"
## [75] "NonAMP.bi.mpd" "NonAMP.bi.NRI"
## [77] "NonAMP.bi.mntd" "NonAMP.bi.NTI"
## [79] "AMP.PSV" "NonAMP.PSV"
## [81] "AllP.PSV" "site.latitude"
## [83] "rgi.mgm.H" "cazy.mgm.H"
## [85] "ko.mgm.H" "cog.mgm.H"
## [87] "bac.mgm.H" "bac.amp.H"
## [89] "rgi.mgm.S" "cazy.mgm.S"
## [91] "ko.mgm.S" "cog.mgm.S"
## [93] "bac.mgm.S" "bac.amp.S"
## [95] "cog.mgm.sum" "rgi.mgm.sum"
## [97] "cazy.mgm.sum" "ko.mgm.sum"
## [99] "bac.mgm.sum" "pH2"
## [101] "X_" "Rhodoplanes"
## [103] "DA101" "Bradyrhizobium"
## [105] "Candidatus_Solibacter" "Candidatus_Xiphinematobacter"
## [107] "Candidatus_Koribacter" "Burkholderia"
## [109] "Mycobacterium" "Salinispora"
## [111] "Methylosinus" "Candidatus_Rhabdochlamydia"
## [113] "Nitrospira" "Steroidobacter"
## [115] "Pedomicrobium" "Pedosphaera"
## [117] "Planctomyces" "Kaistobacter"
## [119] "Gemmata" "Chthoniobacter"
## [121] "Methylibium" "Kitasatospora"
## [123] "Phenylobacterium" "Bacillus"
## [125] "Flavobacterium" "Pseudomo_s"
## [127] "Bdellovibrio" "A17"
## [129] "Pseudonocardia" "Opitutus"
## [131] "Mesorhizobium" "Hyphomicrobium"
## [133] "Solirubrobacter" "Ramlibacter"
## [135] "Aquicella" "Fimbriimo_s"
## [137] "Pirellula" "Pilimelia"
## [139] "Sphingomo_s" "Flavisolibacter"
## [141] "Afifella" "Rhizobium"
## [143] "Salinibacterium" "Agrobacterium"
## [145] "Aetherobacter" "Couchioplanes"
## [147] "Dokdonella" "Rubrivivax"
## [149] "Humicoccus" "Granulicella"
## [151] "A_eromyxobacter" "Geobacter"
## [153] "Conexibacter" "FFCH10602"
## [155] "Paenibacillus" "Streptomyces"
## [157] "Bosea" "Chitinophaga"
## [159] "Telmatospirillum" "Luteibacter"
## [161] "Janthinobacterium" "Devosia"
## [163] "Acidocella" "Asteroleplasma"
## [165] "Labrys" "Lysobacter"
## [167] "Dactylosporangium" "Nocardioides"
## [169] "Thermomo_s" "Collimo_s"
## [171] "Catellatospora" "Chryseobacterium"
## [173] "Mucilaginibacter" "Actinoplanes"
## [175] "Acidicapsa" "Virgisporangium"
## [177] "Kribbella" "Methylovirgula"
## [179] "Ellin506" "Candidatus_Protochlamydia"
## [181] "Sediminibacterium" "Averyella"
## [183] "Nevskia" "Pedobacter"
## [185] "Novosphingobium" "Serratia"
## [187] "OR.59" "Polaromo_s"
## [189] "JG37.AG.70" "Caulobacter"
## [191] "Microbacterium" "Streptosporangium"
## [193] "Asticcacaulis" "Niastella"
## [195] "Agromyces" "Iamia"
## [197] "Balneimo_s" "Stenotrophomo_s"
## [199] "Rubrobacter" "Silvimo_s"
## [201] "Singulisphaera" "Flavihumibacter"
## [203] "Gemmatimo_s" "Luteolibacter"
## [205] "Diplazium" "Roseateles"
## [207] "Terriglobus" "Sphingobium"
## [209] "Lotus" "Cupriavidus"
## [211] "Cellulomo_s" "Actinoallomurus"
## [213] "Nocardia" "Frankia"
## [215] "Micromonospora" "Parachlamydia"
## [217] "Comamo_s" "Cryptosporangium"
## [219] "Amycolatopsis" "Clostridium"
## [221] "Legionella" "Pseudoxanthomo_s"
## [223] "Segetibacter" "Herminiimo_s"
## [225] "Amaricoccus" "Arthrobacter"
## [227] "Methylobacterium" "Plesiocystis"
## [229] "Perlucidibaca" "Skermanella"
## [231] "Mycopla_" "Sulcia"
## [233] "Acinetobacter" "Kibdelosporangium"
## [235] "Sporocytophaga" "Solwaraspora"
## [237] "Rhodoferax" "Cytophaga"
## [239] "heteroC45_4W" "Myxococcus"
## [241] "Nitrosovibrio" "Treponema"
## [243] "Cohnella" "Fluviicola"
## [245] "Ktedonobacter" "Acidisoma"
## [247] "Modestobacter" "Spirochaeta"
## [249] "Adhaeribacter" "Dyadobacter"
## [251] "Sphingobacterium" "Cellvibrio"
## [253] "Methylocella" "Candidatus_Amoebophilus"
## [255] "Uliginosibacterium" "Alicyclobacillus"
## [257] "Rhodomicrobium" "X29.Apr"
## [259] "Rhodococcus" "Solitalea"
## [261] "Haliangium" "Loktanella"
## [263] "Ferruginibacter" "Chromobacterium"
## [265] "Methylotenera" "Planotetraspora"
## [267] "Ralstonia" "Chondromyces"
## [269] "Jiangella" "Leptothrix"
## [271] "Saccharopolyspora" "Sphingopyxis"
## [273] "Cystobacter" "Neochlamydia"
## [275] "Hydrogenophaga" "Sporichthya"
## [277] "Elbe" "Prosthecobacter"
## [279] "Dechloromo_s" "Methylocapsa"
## [281] "Phycicoccus" "Rickettsia"
## [283] "X_nnocystis" "Dok59"
## [285] "Rhodovastum" "Turneriella"
## [287] "Inquilinus" "Aeromicrobium"
## [289] "Dysgonomo_s" "Rickettsiella"
## [291] "Actinocorallia" "Pleomorphomo_s"
## [293] "Candidatus_Glomeribacter" "Cryocola"
## [295] "Rhodobacter" "Carludovica"
## [297] "Nonomuraea" "Turicibacter"
## [299] "Actinomycetospora" "Azospirillum"
## [301] "Geodermatophilus" "Kaistia"
## [303] "Methyloversatilis" "Polymorphospora"
## [305] "Promicromonospora" "Rhodopila"
## [307] "Sorangium" "Rathayibacter"
## [309] "Actinomadura" "Arthrospira"
## [311] "Haliscomenobacter" "Arenimo_s"
## [313] "Estrella" "Simkania"
## [315] "Acanthamoeba" "Angiopteris"
## [317] "Bordetella" "Demequi_"
## [319] "Chthonomo_s" "Methylomo_s"
## [321] "Pythium" "Leptonema"
## [323] "Dictyostelium" "Geothrix"
## [325] "Hymenobacter" "Parvibaculum"
## [327] "Anno_" "Brevibacillus"
## [329] "Brownia" "Coprococcus"
## [331] "Coxiella" "Paludibacter"
## [333] "Roseomo_s" "Brenneria"
## [335] "Hydrocarboniphaga" "Bryobacter"
## [337] "Congregibacter" "Emticicia"
## [339] "Lactococcus" "Roseococcus"
## [341] "Ammoniphilus" "Desulfovibrio"
## [343] "Filimo_s" "Kouleothrix"
## [345] "Nostocoida" "Solibacillus"
## [347] "Staphylococcus" "Caloramator"
## [349] "Crenothrix" "Crocinitomix"
## [351] "K82" "Kineosporia"
## [353] "Tatlockia" "Candidatus_Azobacteroides"
## [355] "Propionivibrio" "Smaragdicoccus"
## [357] "Syntrophobacter" "YNPFFP6"
## [359] "Candidatus_Methylacidiphilum" "Desulfobulbus"
## [361] "LP2A" "Methylopila"
## [363] "Rubricoccus" "Sphaerisporangium"
## [365] "Tannerella" "A_erospora"
## [367] "Anomodon" "Arabidopsis"
## [369] "Caldilinea" "Candidatus_Entotheonella"
## [371] "Candidatus_Portiera" "Chlamydia"
## [373] "Desulfosporosinus" "Elizabethkingia"
## [375] "Elusimicrobium" "Enhydrobacter"
## [377] "Erwinia" "Fritschea"
## [379] "Leadbetterella" "Leptospira"
## [381] "Paracoccus" "Phaeospirillum"
## [383] "Shimazuella" "Wolbachia"
## [385] "Xenorhabdus" "Lacibacter"
## [387] "Longispora" "Azoarcus"
## [389] "Citricoccus" "Coccomyxa"
## [391] "Edaphobacter" "Euzebya"
## [393] "Nitrosospira" "Rubritalea"
## [395] "Spirosoma" "Sporomusa"
## [397] "Williamsia" "Akkermansia"
## [399] "Aurantimo_s" "Azolla"
## [401] "Bacteroides" "Brevundimo_s"
## [403] "Candidatus_Neoehrlichia" "Chelatococcus"
## [405] "Corynebacterium" "Epulopiscium"
## [407] "Gallionella" "GOUTA19"
## [409] "Halomicronema" "Halothiobacillus"
## [411] "Leifsonia" "Luteimo_s"
## [413] "Niabella" "Oscillochloris"
## [415] "Patulibacter" "Pelosinus"
## [417] "Procabacter" "Pseuda_bae_"
## [419] "Pullulanibacillus" "Rhodocytophaga"
## [421] "Spermatozopsis" "Symbiobacterium"
## [423] "Vogesella" "Xanthobacter"
## [425] "Acetobacter" "Actinoalloteichus"
## [427] "Actinocatenispora" "Actinopolymorpha"
## [429] "Alistipes" "Andreprevotia"
## [431] "Aneurinibacillus" "Aquimari_"
## [433] "Ardenscate_" "Azospira"
## [435] "Beijerinckia" "Blautia"
## [437] "Caldicoprobacter" "Cocleimo_s"
## [439] "Coraliomargarita" "Cryptoprodotis"
## [441] "Deefgea" "Dehalobacterium"
## [443] "Desulfococcus" "Desulfomonile"
## [445] "Desulfotomaculum_Desulfovirgula" "Fibrobacteres.2"
## [447] "Finegoldia" "Flexithrix"
## [449] "Gordonia" "Lactobacillus"
## [451] "Larkinella" "Leptotrichia"
## [453] "Litorilinea" "Longilinea"
## [455] "Lutibacterium" "Macrococcus"
## [457] "Magnetospirillum" "Marinobacter"
## [459] "Microtetraspora" "Novispirillum"
## [461] "Oxalobacter" "Oxobacter"
## [463] "Phaeoceros" "Phytophthora"
## [465] "SC3.56" "Shinella"
## [467] "Sporosarci_" "Streptococcus"
## [469] "Thalassiosira" "Thauera"
## [471] "Verrucomicrobium" "Waddlia"
## [473] "Wautersiella" "Weissella"
## [475] "A_erolinea" "A_erovorax"
## [477] "Acidiphilium" "Acidithiobacillus"
## [479] "Actinomyces" "Acutodesmus"
## [481] "Aerococcus" "Allochromatium"
## [483] "Alloiococcus" "Alysiella"
## [485] "Ancylobacter" "Armatimo_s"
## [487] "Balneola" "Bartonella"
## [489] "Buchnera" "C39"
## [491] "Candidatus_Cardinium" "Candidatus_Phytoplasma"
## [493] "Chloronema" "Chroococcidiopsis"
## [495] "Cyanophora" "Deinococcus"
## [497] "Dorea" "Ehrlichia"
## [499] "Enterococcus" "Exiguobacterium"
## [501] "Flectobacillus" "Formivibrio"
## [503] "Haemophilus" "Marinobacterium"
## [505] "Moorella" "Moraxella"
## [507] "MSBL3" "Mycoplasma"
## [509] "N09" "Nitrosomo_s"
## [511] "Oryza" "Oscillospira"
## [513] "Parabacteroides" "Peptoniphilus"
## [515] "Phaselicystis" "Photorhabdus"
## [517] "Planifilum" "Pleurozia"
## [519] "Polysphondylium" "Pontibacter"
## [521] "Propionicimo_s" "Psychrobacter"
## [523] "PW3" "Saprolegnia"
## [525] "Sarci_" "Schumannella"
## [527] "Tepidimicrobium" "Thioalkalivibrio"
## [529] "Thiobacillus" "Thiorhodospira"
## [531] "Tindallia_Anoxy_tronum" "TS34"
## [533] "Vibrio" "Xylanimicrobium"
bac.genus2<-bac.genus[,c(29,82,102:111)]
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.6 ✔ dplyr 1.0.10
## ✔ readr 2.1.3 ✔ stringr 1.4.1
## ✔ purrr 0.3.5 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
bac.genus3<-gather(bac.genus2,key = "Genus",value = "Abundance",-"pH",-"site.latitude")
bac.genus3$Genus<-factor(bac.genus3$Genus,levels = c("Rhodoplanes","DA101","Bradyrhizobium",
"Candidatus_Solibacter","Candidatus_Xiphinematobacter",
"Candidatus_Koribacter","Burkholderia","Mycobacterium",
"Salinispora","Methylosinus"))
ggplot()+geom_point(data = bac.genus3,aes(x=pH,y=Abundance*100,color=site.latitude),size=3)+
facet_wrap(.~Genus,nrow = 2,ncol = 5)+
geom_smooth(data = bac.genus3,aes(x=pH,y=Abundance*100),method = "lm")+
theme_bw()+ylab("Relative Abundance (%)")+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"),
strip.text = element_text(size = 12))
## `geom_smooth()` using formula = 'y ~ x'

library(ggpubr)
#ggsave("otu_genome/Bacteria_top10genus2.pdf",width = 16,height = 7)
shapiro.test(residuals(lm(bac.genus2$Rhodoplanes~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(bac.genus2$Rhodoplanes ~ bac.genus2$pH))
## W = 0.96651, p-value = 0.338
summary(lm(bac.genus2$Rhodoplanes~bac.genus2$pH))
##
## Call:
## lm(formula = bac.genus2$Rhodoplanes ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.022416 -0.008384 -0.000337 0.006603 0.037603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.062568 0.010722 5.835 1.41e-06 ***
## bac.genus2$pH -0.002454 0.002137 -1.149 0.259
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01308 on 34 degrees of freedom
## Multiple R-squared: 0.03735, Adjusted R-squared: 0.009034
## F-statistic: 1.319 on 1 and 34 DF, p-value: 0.2588
shapiro.test(residuals(lm(log(bac.genus2$DA101)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$DA101) ~ bac.genus2$pH))
## W = 0.94418, p-value = 0.06861
summary(lm(log(bac.genus2$DA101)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$DA101) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.38637 -0.81590 0.09182 0.91217 1.68340
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4393 0.9516 -7.818 4.24e-09 ***
## bac.genus2$pH 0.7550 0.1896 3.982 0.000341 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared: 0.318, Adjusted R-squared: 0.2979
## F-statistic: 15.85 on 1 and 34 DF, p-value: 0.0003414
shapiro.test(residuals(lm(log(bac.genus2$Bradyrhizobium)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Bradyrhizobium) ~ bac.genus2$pH))
## W = 0.97584, p-value = 0.6046
summary(lm(log(bac.genus2$Bradyrhizobium)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Bradyrhizobium) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.00276 -0.40891 0.05477 0.35049 0.97719
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.07688 0.41983 -9.711 2.46e-11 ***
## bac.genus2$pH 0.02746 0.08366 0.328 0.745
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.512 on 34 degrees of freedom
## Multiple R-squared: 0.00316, Adjusted R-squared: -0.02616
## F-statistic: 0.1078 on 1 and 34 DF, p-value: 0.7447
shapiro.test(residuals(lm(log(bac.genus2$Candidatus_Solibacter)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Candidatus_Solibacter) ~ bac.genus2$pH))
## W = 0.9722, p-value = 0.4888
summary(lm(log(bac.genus2$Candidatus_Solibacter)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Candidatus_Solibacter) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86413 -0.28411 -0.00203 0.14321 0.91210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.75191 0.33062 -5.299 7.03e-06 ***
## bac.genus2$pH -0.45849 0.06589 -6.959 5.04e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4032 on 34 degrees of freedom
## Multiple R-squared: 0.5875, Adjusted R-squared: 0.5754
## F-statistic: 48.42 on 1 and 34 DF, p-value: 5.043e-08
shapiro.test(residuals(lm(log(bac.genus2$Candidatus_Xiphinematobacter)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Candidatus_Xiphinematobacter) ~ bac.genus2$pH))
## W = 0.96822, p-value = 0.379
summary(lm(log(bac.genus2$Candidatus_Xiphinematobacter)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Candidatus_Xiphinematobacter) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.41068 -0.55439 -0.00542 0.29822 1.34609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.1238 0.5773 -3.679 0.000804 ***
## bac.genus2$pH -0.5415 0.1150 -4.707 4.11e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.704 on 34 degrees of freedom
## Multiple R-squared: 0.3945, Adjusted R-squared: 0.3767
## F-statistic: 22.16 on 1 and 34 DF, p-value: 4.105e-05
shapiro.test(residuals(lm(log(bac.genus2$Candidatus_Koribacter)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Candidatus_Koribacter) ~ bac.genus2$pH))
## W = 0.98371, p-value = 0.862
summary(lm(log(bac.genus2$Candidatus_Koribacter)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Candidatus_Koribacter) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.59878 -0.37855 -0.01199 0.36615 1.31598
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.3010 0.5092 -2.555 0.0153 *
## bac.genus2$pH -0.7152 0.1015 -7.048 3.88e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6211 on 34 degrees of freedom
## Multiple R-squared: 0.5937, Adjusted R-squared: 0.5817
## F-statistic: 49.68 on 1 and 34 DF, p-value: 3.881e-08
shapiro.test(residuals(lm(log(bac.genus2$Burkholderia)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Burkholderia) ~ bac.genus2$pH))
## W = 0.97527, p-value = 0.5855
summary(lm(log(bac.genus2$Burkholderia)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Burkholderia) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.00953 -0.38673 -0.05603 0.42936 0.97480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.34420 0.44239 -7.559 8.86e-09 ***
## bac.genus2$pH -0.33371 0.08816 -3.785 0.000596 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5396 on 34 degrees of freedom
## Multiple R-squared: 0.2965, Adjusted R-squared: 0.2758
## F-statistic: 14.33 on 1 and 34 DF, p-value: 0.0005964
shapiro.test(residuals(lm(log(bac.genus2$Mycobacterium)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Mycobacterium) ~ bac.genus2$pH))
## W = 0.9797, p-value = 0.7353
summary(lm(log(bac.genus2$Mycobacterium)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Mycobacterium) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3513 -0.4097 -0.0918 0.5553 1.2193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.7983 0.5455 -10.630 2.39e-12 ***
## bac.genus2$pH 0.1538 0.1087 1.415 0.166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6653 on 34 degrees of freedom
## Multiple R-squared: 0.0556, Adjusted R-squared: 0.02783
## F-statistic: 2.002 on 1 and 34 DF, p-value: 0.1662
shapiro.test(residuals(lm(sqrt(bac.genus2$Salinispora)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(sqrt(bac.genus2$Salinispora) ~ bac.genus2$pH))
## W = 0.98181, p-value = 0.8048
summary(lm(sqrt(bac.genus2$Salinispora)~bac.genus2$pH))
##
## Call:
## lm(formula = sqrt(bac.genus2$Salinispora) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.059806 -0.013819 -0.002034 0.019920 0.076001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.139580 0.022245 6.275 3.8e-07 ***
## bac.genus2$pH -0.018008 0.004433 -4.062 0.000271 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02713 on 34 degrees of freedom
## Multiple R-squared: 0.3268, Adjusted R-squared: 0.307
## F-statistic: 16.5 on 1 and 34 DF, p-value: 0.0002708
shapiro.test(residuals(lm(log(bac.genus2$Methylosinus)~bac.genus2$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(log(bac.genus2$Methylosinus) ~ bac.genus2$pH))
## W = 0.96348, p-value = 0.2747
summary(lm(log(bac.genus2$Methylosinus)~bac.genus2$pH))
##
## Call:
## lm(formula = log(bac.genus2$Methylosinus) ~ bac.genus2$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8997 -0.9741 0.0638 1.1118 2.3008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.8265 1.2057 -6.491 2e-07 ***
## bac.genus2$pH 0.2623 0.2403 1.092 0.283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.471 on 34 degrees of freedom
## Multiple R-squared: 0.03385, Adjusted R-squared: 0.005438
## F-statistic: 1.191 on 1 and 34 DF, p-value: 0.2827
#Suplementary Fig.4
data.genomesize.bahram<-read.csv("data/bahram/data.genomesize4.csv")
p1<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=pH,y=AGS),size=5)+
geom_smooth(data = data.genomesize.bahram,aes(x=pH,y=AGS),method = "lm")+
xlab("pH")+ylab("Average Genome Size (Mbp)")+theme_bw()+
theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
axis.title = element_text(size = 12,color = "black",face = "bold"))+
annotate("text",x=7,y=6, label="rho = -0.43 ***",
size=5,color="black")+ggtitle("Based on GTDB Annotation")
p1
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

p6<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=pH,y=AGS2_mgm),size=5)+
geom_smooth(data = data.genomesize.bahram,aes(x=pH,y=AGS2_mgm),method = "lm")+
xlab("pH")+ylab("Average Genome Size (Mbp)")+theme_bw()+
theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
axis.title = element_text(size = 12,color = "black",face = "bold"))+
annotate("text",x=7,y=6, label="rho = -0.22 *",
size=5,color="black")+ggtitle("Based on Metagenome")
p6
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).

#Supplementary Fig. 4
library(ggpubr)
ggarrange(p6,p1,ncol = 2,nrow = 1,
labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).

#ggsave("pdf/Fig.S4_genometraits_bahram.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary Fig. 5
summary(lm(env.genomeTraits$GC_percent~env.genomeTraits$AGS))
##
## Call:
## lm(formula = env.genomeTraits$GC_percent ~ env.genomeTraits$AGS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3771 -0.7417 0.1210 0.5994 3.0543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.4820 0.7923 82.652 < 2e-16 ***
## env.genomeTraits$AGS -0.8438 0.1815 -4.649 4.88e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared: 0.3886, Adjusted R-squared: 0.3706
## F-statistic: 21.61 on 1 and 34 DF, p-value: 4.88e-05
shapiro.test(residuals(lm(env.genomeTraits$GC_percent~env.genomeTraits$AGS)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$GC_percent ~ env.genomeTraits$AGS))
## W = 0.98259, p-value = 0.8289
p12.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS, y=GC_percent, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS, y=GC_percent), method ="lm", col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "GC Content (%)")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
theme(legend.title = element_text(colour="black", size=12, face="bold"),
legend.text = element_text(colour="black", size=12, face="bold"),
axis.text=element_text(size=12,face="bold"),
axis.title=element_text(size=12,face="bold"))+
annotate("text",x=3,y=59, label="R2 = 0.371\nP = 4.88e-05",
size=5,color="black")+ggtitle("Based on Metagenome")
p12.1
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(GC_otu~AGS_otu,genomesize_otu))
##
## Call:
## lm(formula = GC_otu ~ AGS_otu, data = genomesize_otu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0164537 -0.0058825 0.0000878 0.0043699 0.0143172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.558234 0.041071 13.592 2.65e-15 ***
## AGS_otu 0.010524 0.007984 1.318 0.196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007654 on 34 degrees of freedom
## Multiple R-squared: 0.04863, Adjusted R-squared: 0.02064
## F-statistic: 1.738 on 1 and 34 DF, p-value: 0.1962
shapiro.test(residuals(lm(GC_otu~AGS_otu,genomesize_otu)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(GC_otu ~ AGS_otu, genomesize_otu))
## W = 0.97653, p-value = 0.6276
p19<-ggplot() + geom_jitter(data=genomesize_otu, height = 0, aes(x= AGS_otu, y=GC_otu*100, color = site.latitude), alpha = 0.8, size =5) +
#geom_smooth(data=genomesize_otu,aes(x= AGS_otu, y=GC_otu*100), method ="lm", col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "GC Percent (%)")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
theme(legend.title = element_text(colour="black", size=12, face="bold"),
legend.text = element_text(colour="black", size=12, face="bold"),
axis.text=element_text(size=12,face="bold"),
axis.title=element_text(size=12,face="bold"))+
annotate("text",x=5.4,y=60, label="R2 = 0.021\nP = 0.1962",size=5,color="black")+
ggtitle("Based on GTDB Annotation")
p19

#Supplementary Fig. 5
library(ggpubr)
ggarrange(p12.1,p19,ncol = 2,nrow = 1,
labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Fig.S5-1_genometraits_bahram.pdf",plot = last_plot(),units = "in",height = 3.5,width = 9,dpi = 600)
#Supplementary Fig. 6
summary(lm(gc_percent~AGS,data = data.genomesize.bahram))
##
## Call:
## lm(formula = gc_percent ~ AGS, data = data.genomesize.bahram)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0204301 -0.0041983 -0.0000698 0.0039563 0.0223992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.590390 0.010844 54.444 < 2e-16 ***
## AGS 0.007965 0.001993 3.997 0.000103 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007065 on 140 degrees of freedom
## Multiple R-squared: 0.1024, Adjusted R-squared: 0.09601
## F-statistic: 15.97 on 1 and 140 DF, p-value: 0.0001034
shapiro.test(residuals(lm(gc_percent~AGS,data = data.genomesize.bahram)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(gc_percent ~ AGS, data = data.genomesize.bahram))
## W = 0.98831, p-value = 0.2776
p10<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=AGS2_mgm,y=GC_percent_mgm),size=5)+
geom_smooth(data = data.genomesize.bahram,aes(x=AGS2_mgm,y=GC_percent_mgm),method = "lm")+
ylab("GC Percent (%)")+xlab("Average Genome Size (Mbp)")+theme_bw()+
theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
axis.title = element_text(size = 12,color = "black",face = "bold"))+
annotate("text",x=5,y=67, label="rho = -0.57 ***",
size=5,color="black")+ggtitle("Based on Metagenome")
p10
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(gc_percent~AGS,data = data.genomesize.bahram))
##
## Call:
## lm(formula = gc_percent ~ AGS, data = data.genomesize.bahram)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0204301 -0.0041983 -0.0000698 0.0039563 0.0223992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.590390 0.010844 54.444 < 2e-16 ***
## AGS 0.007965 0.001993 3.997 0.000103 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007065 on 140 degrees of freedom
## Multiple R-squared: 0.1024, Adjusted R-squared: 0.09601
## F-statistic: 15.97 on 1 and 140 DF, p-value: 0.0001034
shapiro.test(residuals(lm(gc_percent~AGS,data = data.genomesize.bahram)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(gc_percent ~ AGS, data = data.genomesize.bahram))
## W = 0.98831, p-value = 0.2776
p5<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=AGS,y=gc_percent*100),size=5)+
geom_smooth(data = data.genomesize.bahram,aes(x=AGS,y=gc_percent*100),method = "lm")+
ylab("GC Percent (%)")+xlab("Average Genome Size (Mbp)")+theme_bw()+
theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
axis.title = element_text(size = 12,color = "black",face = "bold"))+
annotate("text",x=5,y=65, label="rho = 0.25 **",
size=5,color="black")+ggtitle("Based on GTDB Annotation")
p5
## `geom_smooth()` using formula = 'y ~ x'

#Supplementary Fig. 6
library(ggpubr)
ggarrange(p10,p5,ncol = 2,nrow = 1,
labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Fig.S5-2_genometraits_bahram.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary FIg.7
p1<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=bac.amp.H), method ="lm", col= "white")+
labs(x = "Soil pH",y = "H'.16S")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=6.3, label="R2 = 0.481\nP = 1.653e-06",
size=5,color="black")
p1

#ggsave("pdf/Fig.1a.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$bac.amp.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40585 -0.16282 -0.04642 0.14106 0.50752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.56163 0.18483 30.090 < 2e-16 ***
## env.amp1$pH 0.21297 0.03683 5.782 1.65e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2254 on 34 degrees of freedom
## Multiple R-squared: 0.4958, Adjusted R-squared: 0.481
## F-statistic: 33.43 on 1 and 34 DF, p-value: 1.653e-06
shapiro.test(residuals(lm(env.amp1$bac.amp.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.amp.H ~ env.amp1$pH))
## W = 0.97818, p-value = 0.6835
p3<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=ko.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=ko.mgm.H), col= "white")+
labs(x = "Soil pH",y = "H'.KO")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=7.9, label="R2 = 0.199\nP = 0.009",
size=5,color="black")
p3

#ggsave("pdf/Fig.1c.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
m2<-lm(env.amp1$ko.mgm.H ~ env.amp1$pH + I(env.amp1$pH^2) )
AIC(m2)
## [1] -158.4754
summary(m2)
##
## Call:
## lm(formula = env.amp1$ko.mgm.H ~ env.amp1$pH + I(env.amp1$pH^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.055544 -0.016888 0.003081 0.019042 0.055272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.104670 0.117085 69.221 <2e-16 ***
## env.amp1$pH -0.087095 0.045769 -1.903 0.0658 .
## I(env.amp1$pH^2) 0.007155 0.004317 1.657 0.1069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02503 on 33 degrees of freedom
## Multiple R-squared: 0.245, Adjusted R-squared: 0.1992
## F-statistic: 5.354 on 2 and 33 DF, p-value: 0.009689
shapiro.test(residuals(m2))
##
## Shapiro-Wilk normality test
##
## data: residuals(m2)
## W = 0.95967, p-value = 0.21
p5<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y = rgi.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=rgi.mgm.H), method = "lm", color = "white")+
labs(x = "Soil pH",y = "H'.ARG")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=4.9, label="R2 = 0.190\nP = 0.004",
size=5,color="black")
p5

summary(lm(env.amp1$rgi.mgm.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$rgi.mgm.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.040812 -0.012858 0.000664 0.014349 0.049838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.917865 0.017772 276.712 < 2e-16 ***
## env.amp1$pH -0.010755 0.003542 -3.037 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02168 on 34 degrees of freedom
## Multiple R-squared: 0.2133, Adjusted R-squared: 0.1902
## F-statistic: 9.221 on 1 and 34 DF, p-value: 0.004571
shapiro.test(residuals(lm(env.amp1$rgi.mgm.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$rgi.mgm.H ~ env.amp1$pH))
## W = 0.98332, p-value = 0.8506
#ggsave("pdf2/Fig.2b.corr.pH.HARG.2022.12.29.pdf", width = 4, height = 3)
p7<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y = cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=cazy.mgm.H), method = "lm", color = "white")+
labs(x = "Soil pH",y = "H'.Cazy")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=3.55, label="R2 = 0.833\nP = 5.52e-15",
size=5,color="black")
p7

summary(lm(env.amp1$cazy.mgm.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$cazy.mgm.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033579 -0.011907 -0.002831 0.011534 0.057275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.729231 0.017529 212.74 < 2e-16 ***
## env.amp1$pH -0.046286 0.003493 -13.25 5.52e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02138 on 34 degrees of freedom
## Multiple R-squared: 0.8378, Adjusted R-squared: 0.833
## F-statistic: 175.6 on 1 and 34 DF, p-value: 5.52e-15
shapiro.test(residuals(lm(env.amp1$cazy.mgm.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$cazy.mgm.H ~ env.amp1$pH))
## W = 0.97247, p-value = 0.4966
#ggsave("pdf2/Fig.2c.corr.pH.HCazy.2022.12.29.pdf", width = 4, height = 3)
p1.1<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= round(ko.mgm.H,digits = 2), y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= ko.mgm.H, y=bac.amp.H), method ="lm", col= "white")+
labs(x = "H'.KO",y = "H'.16S")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=7.87,y=7.2, label="R2 = 0.294\nP = 0.00037",
size=5,color="black")
p1.1

#ggsave("pdf/Fig.1b.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.H~env.amp1$ko.mgm.H))
##
## Call:
## lm(formula = env.amp1$bac.amp.H ~ env.amp1$ko.mgm.H)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48005 -0.22545 0.00627 0.19960 0.63453
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.902 12.478 4.48 8.03e-05 ***
## env.amp1$ko.mgm.H -6.274 1.588 -3.95 0.000373 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2628 on 34 degrees of freedom
## Multiple R-squared: 0.3146, Adjusted R-squared: 0.2944
## F-statistic: 15.61 on 1 and 34 DF, p-value: 0.0003731
shapiro.test(residuals(lm(env.amp1$bac.amp.H~env.amp1$ko.mgm.H)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.amp.H ~ env.amp1$ko.mgm.H))
## W = 0.97175, p-value = 0.4752
p3.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS, y=round(ko.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS, y=ko.mgm.H), method = "lm",col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "H'.KO")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6,y=7.89, label="R2 = 0.0006\nP = 0.886",
size=5,color="black")
p3.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS))
##
## Call:
## lm(formula = env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.057973 -0.019978 -0.003867 0.020544 0.049300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.8596291 0.0193594 405.986 <2e-16 ***
## env.genomeTraits$AGS -0.0006406 0.0044351 -0.144 0.886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02837 on 34 degrees of freedom
## Multiple R-squared: 0.0006132, Adjusted R-squared: -0.02878
## F-statistic: 0.02086 on 1 and 34 DF, p-value: 0.886
shapiro.test(residuals(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS))
## W = 0.96903, p-value = 0.3996
p5.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS, y=round(rgi.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS, y=rgi.mgm.H), method = "lm",col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "H'.ARG")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6,y=4.9, label="R2 = 0.239\nP = 0.001",
size=5,color="black")
p5.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS))
##
## Call:
## lm(formula = env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.050940 -0.013002 0.003482 0.014833 0.037889
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.816838 0.014333 336.063 < 2e-16 ***
## env.genomeTraits$AGS 0.011384 0.003284 3.467 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02101 on 34 degrees of freedom
## Multiple R-squared: 0.2612, Adjusted R-squared: 0.2395
## F-statistic: 12.02 on 1 and 34 DF, p-value: 0.001447
shapiro.test(residuals(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS))
## W = 0.96708, p-value = 0.3513
p7.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS, y=cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS, y=cazy.mgm.H), method = "lm",col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "H'.Cazy")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6,y=3.6, label="R2 = 0.322\nP = 0.0001",
size=5,color="black")
p7.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS))
##
## Call:
## lm(formula = env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.076645 -0.025371 0.006949 0.026161 0.081067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.382131 0.029386 115.1 < 2e-16 ***
## env.genomeTraits$AGS 0.028276 0.006732 4.2 0.000182 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04307 on 34 degrees of freedom
## Multiple R-squared: 0.3416, Adjusted R-squared: 0.3222
## F-statistic: 17.64 on 1 and 34 DF, p-value: 0.0001819
shapiro.test(residuals(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS))
## W = 0.97231, p-value = 0.4918
#Supplementary FIg.7
library(ggpubr)
ggarrange(p1,p3,p5,p7,
p1.1,p3.1,p5.1,p7.1,
ncol = 4,nrow = 2,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
labels = c("a","b","c","d","e","f","g","h"))

#ggsave("pdf/Supplementary.Fig2_richness&diversity.pdf",plot = last_plot(),units = "in",height = 7,width = 16,dpi = 600)
#Supplementary Fig. 8
library(linkET)
library(dplyr)
colnames(bac.amp1) == colnames(ko.mgm1)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
bac.amp2<-data.frame(t(bac.amp1))
bac.amp2<-data.frame(apply(bac.amp2,1,function(x){x/sum(x)}))
ko.mgm2<-data.frame(t(ko.mgm1))
ko.mgm2<-data.frame(apply(ko.mgm2,1,function(x){x/sum(x)}))
bac.ko<-cbind(data.frame(t(bac.amp1)),data.frame(t(ko.mgm1)))
env.mantel<-env.genomeTraits[,c("AGS","ACN","GC_percent","Growthrate",
"bac.amp.S","bac.amp.H","ko.mgm.S","ko.mgm.H",
"latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N",
"C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]
colnames(env.mantel)<-c("AGS","ACN","GC content","Doubling time",
"S.16S","H'.16S","S.KO","H'.KO",
"Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#plot
mantel <- mantel_test(bac.ko, env.mantel,
spec_select = list(BacteriaCom = 1:22579,
FunctionCom = 22580:33644),
spec_dist = dist_func(.FUN = "vegdist", method = "bray"),
env_dist = dist_func(.FUN = "vegdist", method = "euclidean")) %>%
mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
pd = cut(p, breaks = c(-Inf, 0.01, 0.05, Inf),
labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))
qcorrplot(correlate(env.mantel,method = "spearman"),type = "lower", diag = FALSE,
grid_size = 0.25) +
geom_square() +
geom_mark(sep = '\n',size=3,sig_level = c(0.05,0.01,0.001),sig_thres = 0.05)+
geom_couple(aes(colour = pd, size = rd), data = mantel, curvature = 0.1) +
scale_fill_gradientn(colours = RColorBrewer::brewer.pal(11, "RdYlBu")) +
#scale_fill_gradient(guide = "legend", high='#2c7bb6', low='#d7191c',name="rho")+
scale_size_manual(values = c(0.5, 1, 2)) +
scale_colour_manual(values = c("#f6e8c3", "#c7eae5", "#A2A2A288")) +
#scale_colour_manual(values = color_pal(3, 0)) +
guides(size = guide_legend(title = "Mantel's r",
override.aes = list(colour = "grey35"),
order = 2),
colour = guide_legend(title = "Mantel's p",
override.aes = list(size = 3),
order = 1),
fill = guide_colorbar(title = "Spearman's rho", order = 3))

#Supplementary Fig. 9
p1<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=bac.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
#geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.H), method ="lm", col= "white")+
labs(x = "Soil pH",y = "H'.Bacteria.mgm")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=6.3, label="R2 = 0.009\nP = 0.253",
size=5,color="black")
p1

#ggsave("pdf2/Fig.S2A.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$bac.mgm.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63980 -0.18019 0.01608 0.20693 0.48888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.34786 0.23546 26.960 <2e-16 ***
## env.amp1$pH 0.05446 0.04692 1.161 0.254
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2872 on 34 degrees of freedom
## Multiple R-squared: 0.03812, Adjusted R-squared: 0.009826
## F-statistic: 1.347 on 1 and 34 DF, p-value: 0.2538
shapiro.test(residuals(lm(env.amp1$bac.mgm.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.mgm.H ~ env.amp1$pH))
## W = 0.98188, p-value = 0.807
library(lme4)
## 载入需要的程辑包:Matrix
## Warning: 程辑包'Matrix'是用R版本4.2.2 来建造的
##
## 载入程辑包:'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## 载入程辑包:'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
lm<-lmer(bac.amp.H~1+pH+(1|latitude),data = env.amp1,
REML = FALSE,control=lmerControl(optimizer="bobyqa"))
shapiro.test(residuals(lm))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm)
## W = 0.97453, p-value = 0.5614
summary(lm)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bac.amp.H ~ 1 + pH + (1 | latitude)
## Data: env.amp1
## Control: lmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## -0.6 5.8 4.3 -8.6 32
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8410 -0.6192 -0.1238 0.4461 1.9018
##
## Random effects:
## Groups Name Variance Std.Dev.
## latitude (Intercept) 0.009959 0.09979
## Residual 0.038044 0.19505
## Number of obs: 36, groups: latitude, 12
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.57559 0.21061 13.64329 26.47 4.01e-13 ***
## pH 0.21013 0.04194 13.72478 5.01 0.000203 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## pH -0.979
tab_model(lm)
|
|
bac.amp.H
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
5.58
|
5.15 – 6.00
|
<0.001
|
|
pH
|
0.21
|
0.12 – 0.30
|
<0.001
|
|
Random Effects
|
|
σ2
|
0.04
|
|
τ00 latitude
|
0.01
|
|
ICC
|
0.21
|
|
N latitude
|
12
|
|
Observations
|
36
|
|
Marginal R2 / Conditional R2
|
0.496 / 0.601
|
p2<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=bac.mgm.S, color = site.latitude), alpha = 0.8, size =5) +
#geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.S), method ="loess", col= "white")+
labs(x = "Soil pH",y = "S.Bacteria.mgm")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=33500, label="R2 = 0.0184\nP = 0.429",
size=5,color="black")
p2

#ggsave("pdf2/Fig.S2B.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.S~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$bac.mgm.S ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -827.16 -280.83 0.17 155.35 958.18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33869.01 322.51 105.0 <2e-16 ***
## env.amp1$pH 51.42 64.27 0.8 0.429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 393.3 on 34 degrees of freedom
## Multiple R-squared: 0.01848, Adjusted R-squared: -0.01039
## F-statistic: 0.6402 on 1 and 34 DF, p-value: 0.4292
shapiro.test(residuals(lm(env.amp1$bac.mgm.S~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.mgm.S ~ env.amp1$pH))
## W = 0.96964, p-value = 0.4158
#Supplementary Fig. 10
env.amp1$sample_name==env.genomeTraits$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(env.amp1[,c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S")])
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(r)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(cor.out$r, 2)
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
cor.out$Y <- factor(cor.out$Y, levels = c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S"))
cor.out$Y <- factor(cor.out$Y, labels = c("H'.bacteria.mgm","S.bacteria.mgm","H'.16S","S.16S","H'.KO","S.KO","H'.ARG","S.ARG","H'.Cazy","S.Cazy"))
ggplot(cor.out, aes(Y,X)) +
geom_tile(aes(fill = r), size=1)+
geom_text(aes(label = r), size = 1.5)+
#scale_fill_gradient(guide = "legend", high='green', low='blue')+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
theme(axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=8, face="bold",angle = 20),
axis.text.y=element_text(colour="black", size=8, face="bold"))

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
p4<-ggplot(cor.out.sig, aes(Y,X)) +
geom_tile(aes(fill = r), size=1)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2.5, font="bold")+theme_bw()+
#scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=12, face="bold"))
p4

#ggsave("pdf/Supplementary_Fig.9.pdf", width = 8, height = 6)
#Supplementary Fig. 11
p1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes( x=bac.amp.S,y= AGS, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(y= AGS, x=bac.amp.S), method ="lm", col= "white")+
labs(y = "Average Genome Size (Mbp)",x = "S.16S")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=4000,y=6.5, label="R2 = 0.187\nP = 0.0049",
size=5,color="black")
p1
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.S))
##
## Call:
## lm(formula = env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.S)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.74244 -0.56667 -0.09104 0.62071 1.85780
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.8181785 0.8744669 7.797 4.5e-09 ***
## env.genomeTraits$bac.amp.S -0.0007490 0.0002489 -3.009 0.00491 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.975 on 34 degrees of freedom
## Multiple R-squared: 0.2103, Adjusted R-squared: 0.1871
## F-statistic: 9.054 on 1 and 34 DF, p-value: 0.004911
shapiro.test(residuals(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.S)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.S))
## W = 0.97275, p-value = 0.5052
p2<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes( x=bac.amp.H,y= AGS, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(y= AGS, x=bac.amp.H), method ="lm", col= "white")+
labs(y = "Average Genome Size (Mbp)",x = "H'.16S")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=7,y=6.5, label="R2 = 0.222\nP = 0.0021",
size=5,color="black")
p2
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.H))
##
## Call:
## lm(formula = env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.H)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6137 -0.5946 -0.1699 0.5811 2.0425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.5164 3.4087 4.552 6.49e-05 ***
## env.genomeTraits$bac.amp.H -1.7076 0.5153 -3.314 0.00219 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9539 on 34 degrees of freedom
## Multiple R-squared: 0.2441, Adjusted R-squared: 0.2219
## F-statistic: 10.98 on 1 and 34 DF, p-value: 0.002193
shapiro.test(residuals(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.H)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.H))
## W = 0.96019, p-value = 0.2179
#Supplementary Fig. 11
library(ggpubr)
ggarrange(p1,p2,
ncol = 2,nrow = 1,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
labels = c("a","b"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Supplementary.Fig.3_16S&AGS.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary Fig. 12a
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(cog.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:480] 0.2 0.15 0.03 -0.43 -0.12 -0.08 -0.01 0.24 0.58 0.49 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"COGs"
ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2, font="bold")+scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(axis.title= element_blank(),
strip.text = element_text(colour="black",size = 12),
axis.text.x=element_text(colour="black", size=12, face="bold"),
axis.text.y=element_text(colour="black", size=12, face="bold"))

#ggsave("pdf/Supplementary.Fig10a.env.cog.genes.2023.01.05.pdf", width = 10, height = 4)
#Supplementary Fig. 12b
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:221300] -0.06 -0.01 0.25 0.13 -0.03 -0.19 -0.28 -0.33 -0.31 -0.2 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"All annotated KOs"
ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 12,face="bold", color = c("black")),
#strip.background = element_rect(fill = "grey"),
panel.spacing = unit(0.1, "lines"),
axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=15, face="bold"))

#ggsave("pdf/Supplementary.Fig10b.corrHeatmap.env.ko.genes.2023.01.05.pdf", width = 20, height = 5)
#Supplementary Fig. 13
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="GBM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Arabinogalactan.biosynthesis.Mycobacterium"
## [2] "Exopolysaccharide.biosynthesis"
## [3] "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate"
## [4] "Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin"
## [5] "Glycosaminoglycan.degradation"
## [6] "Glycosphingolipid.biosynthesis.ganglio.series"
## [7] "Glycosphingolipid.biosynthesis.globo.and.isoglobo.series"
## [8] "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series"
## [9] "Glycosylphosphatidylinositol.anchor.biosynthesis"
## [10] "Lipoarabinomannan.biosynthesis"
## [11] "Lipopolysaccharide.biosynthesis"
## [12] "Mannose.type.O.glycan.biosynthesis"
## [13] "N.Glycan.biosynthesis"
## [14] "O.Antigen.nucleotide.sugar.biosynthesis"
## [15] "O.Antigen.repeat.unit.biosynthesis"
## [16] "Other.glycan.degradation"
## [17] "Other.types.of.O.glycan.biosynthesis"
## [18] "Peptidoglycan.biosynthesis"
## [19] "Teichoic.acid.biosynthesis"
## [20] "Various.types.of.N.glycan.biosynthesis"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Arabinogalactan.biosynthesis.Mycobacterium", "Exopolysaccharide.biosynthesis", "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate",
"Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin", "Glycosaminoglycan.degradation", "Glycosphingolipid.biosynthesis.ganglio.series",
"Glycosphingolipid.biosynthesis.globo.and.isoglobo.series", "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series", "Glycosylphosphatidylinositol.anchor.biosynthesis",
"Lipoarabinomannan.biosynthesis", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis",
"O.Antigen.repeat.unit.biosynthesis", "Mannose.type.O.glycan.biosynthesis",
"N.Glycan.biosynthesis","Peptidoglycan.biosynthesis","Other.glycan.degradation", "Other.types.of.O.glycan.biosynthesis", "Various.types.of.N.glycan.biosynthesis",
"Teichoic.acid.biosynthesis"))
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "ABM", "Exopolysaccharide.biosynthesis", "C",
"H", "GD", "S",
"GI", "L", "A",
"Lipoarabinomannan", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis", "ARUB","M.G",
"N.Glycan", "Peptidoglycan.biosynthesis",
"Other.glycan.D", "O", "VN.glycanB",
"Teichoic.acid.biosyn"))
p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold")) +
labs(caption="ABM: Arabinogalactan.biosynthesis.Mycobacterium;C: Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate; H: Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin; GD: Glycosaminoglycan.degradation; S: Glycosphingolipid.biosynthesis.ganglio.series; GI: Glycosphingolipid.biosynthesis.globo.and.isoglobo.series; L: Glycosphingolipid.biosynthesis.lacto.and.neolacto.series;
A: Glycosylphosphatidylinositol.anchor.biosynthesis; Lipoarabinomannan: Lipoarabinomannan.biosynthesis;ARUB: O.Antigen.repeat.unit.biosynthesis; M.G: Mannose.type.O.glycan.biosynthesis; N.Glycan: N.Glycan.biosynthesis; Other.glycan.D: Other.glycan.degradation; O: Other.types.of.O.glycan.biosynthesis; VN.glycanB: Various.types.of.N.glycan.biosynthesis,Teichoic.acid.biosyn: Teichoic.acid.biosynthesis" )
p1

#ggsave("pdf2/Fig.S6a.corrHeatmap.env.Glycan.2023.01.05.pdf", width = 30, height = 7)
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(cazy.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:2780] 0.24 0.18 -0.22 -0.48 -0.11 0.09 0.12 0.35 0.51 0.61 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Carbohydrate-active enzymes (CAZy)"
p2<-ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1.5, font="bold")+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=18, face="bold"))
p2

#ggsave("pdf2/Fig.S6b.corrHeatmap.env.cazy.genes.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 13
library(ggpubr)
ggarrange(p1,p2,
ncol = 1,nrow = 2,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
labels = c("a","b"))

#ggsave("pdf/Supplementary.Fig.S12.pdf",plot = last_plot(),units = "in",height = 13,width = 30,dpi = 600)
#Supplementary Fig. 14
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="CM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Bacterial.chemotaxis" "Flagellar.assembly"
## [3] "Regulation.of.actin.cytoskeleton"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))
p1<- ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=10, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=15, face="bold")) +
labs(caption="RAC: Regulation of actin cytoskeleton")
p1

#ggsave("pdf2/Fig.S7a.Heatmap.KEGG.cellular.motility.2023.01.05.pdf", width = 20, height = 6)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="Two component system",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Two.component.system"
#cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))
p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 12,face="bold",color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=15, face="bold"))
p2

#ggsave("pdf2/Fig.S7b.corrHeatmap.env.Signal.transduction.2023.01.05.pdf", width = 20, height = 6)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:83460] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility", "Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Cellular.community.prokaryotes","Pentose.and.glucuronate.interconversions",
"Translation", "Energy.metabolism", "Membrane.transport" , "Folding.sorting.and.degradation",
"Metabolism.of.cofactors.and.vitamins" ))
cor.out$Y_4 <- factor(cor.out$Y_4, labels = c("BSS","CM", "XenoBDM","Signal transduction","Cellular community","PGI",
"Translation", "Energy metabolism", "Membrane transport" , "FSD",
"Metabolism of cofactors and vitamins" ))
cor.out.sub<-cor.out[cor.out$Y_4=="Cellular community",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Biofilm.formation" "Quorum.sensing"
p3<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1),
axis.text.y=element_text(colour="black", size=15, face="bold"))
p3

#ggsave("pdf2/Fig.S7c.corrHeatmap.env.Cellular.community.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 14
library(ggpubr)
ggarrange(p1,p2,p3,
ncol = 1,nrow = 3,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
labels = c("a","b","c"))

#ggsave("pdf/Supplementary.Fig.13.pdf",plot = last_plot(),units = "in",height = 18,width = 20,dpi = 600)
#Supplementary Fig. 15
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="MTP",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Biosynthesis.of.12.14.and.16.membered.macrolides"
## [2] "Biosynthesis.of.ansamycins."
## [3] "Biosynthesis.of.enediyne.antibiotics."
## [4] "Biosynthesis.of.siderophore.group.nonribosomal.peptides."
## [5] "Biosynthesis.of.type.II.polyketide.backbone."
## [6] "Biosynthesis.of.type.II.polyketide.products."
## [7] "Biosynthesis.of.vancomycin.group.antibiotics."
## [8] "Carotenoid.biosynthesis."
## [9] "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis."
## [10] "Geraniol.degradation."
## [11] "Insect.hormone.biosynthesis."
## [12] "Limonene.and.pinene.degradation."
## [13] "Monoterpenoid.biosynthesis."
## [14] "Nonribosomal.peptide.structures."
## [15] "Polyketide.sugar.unit.biosynthesis."
## [16] "Sesquiterpenoid.and.triterpenoid.biosynthesis."
## [17] "Terpenoid.backbone.biosynthesis."
## [18] "Tetracycline.biosynthesis."
## [19] "Type.I.polyketide.structures."
## [20] "Zeatin.biosynthesis."
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Biosynthesis.of.12.14.and.16.membered.macrolides", "Biosynthesis.of.ansamycins.", "Biosynthesis.of.enediyne.antibiotics.",
"Biosynthesis.of.siderophore.group.nonribosomal.peptides.", "Biosynthesis.of.type.II.polyketide.backbone.", "Biosynthesis.of.type.II.polyketide.products.",
"Biosynthesis.of.vancomycin.group.antibiotics.", "Carotenoid.biosynthesis.", "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis.",
"Geraniol.degradation.", "Insect.hormone.biosynthesis.", "Limonene.and.pinene.degradation.",
"Monoterpenoid.biosynthesis.", "Nonribosomal.peptide.structures.",
"Polyketide.sugar.unit.biosynthesis.","Sesquiterpenoid.and.triterpenoid.biosynthesis.","Terpenoid.backbone.biosynthesis.", "Tetracycline.biosynthesis.", "Type.I.polyketide.structures.",
"Zeatin.biosynthesis."))
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "BMM", "Biosynthesis.of.ansamycins", "Biosynthesis.of.enediyne.antibiotics",
"Biosynthesis.of.siderophore", "BPB", "Biosyn.polyketide",
"Biosyn.vancomycin", "Carotenoid.biosynthesis", "D","Geraniol.D",
"IH", "LPD", "M", "Nonribosomal.P","Polyketide.sugar.unit.biosynthesis",
"STB", "Terpenoid.backbone.biosynthesis",
"TB", "Type.I.polyketide.structures", "ZB"))
p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold")) +
labs(caption="BBM: Biosynthesis.of.12.14.and.16.membered.macrolides;Biosynthesis.of.siderophore: Biosynthesis.of.siderophore.group.nonribosomal.peptides; BPB: Biosynthesis.of.type.II.polyketide.backbone; Biosyn.polyketide: Biosynthesis.of.type.II.polyketide.products; Biosyn.vancomycin: Biosynthesis.of.vancomycin.group.antibiotics;
D: Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis; Geraniol.D: Geraniol.degradation; IH: Insect.hormone.biosynthesis; LPD: Limonene.and.pinene.degradation; M: Monoterpenoid.biosynthesis; Nonribosomal.P: Nonribosomal.peptide.structures; TB: Terpenoid.backbone.biosynthesis; ZB: Zeatin.biosynthesis" )
p1

#ggsave("pdf2/Fig.S8a.corrHeatmap.env.Metabolism.Terpenoid.2023.01.05.pdf", width = 30, height = 7)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="XenoBDM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Aminobenzoate.degradation"
## [2] "Atrazine.degradation"
## [3] "Benzoate.degradation"
## [4] "Bisphenol.degradation"
## [5] "Caprolactam.degradation"
## [6] "Chloroalkane.and.chloroalkene.degradation"
## [7] "Chlorocyclohexane.and.chlorobenzene.degradation"
## [8] "Dioxin.degradation"
## [9] "Drug.metabolism.cytochrome.P450"
## [10] "Drug.metabolism.other.enzymes"
## [11] "Ethylbenzene.degradation"
## [12] "Fluorobenzoate.degradation"
## [13] "Furfural.degradation"
## [14] "Metabolism.of.xenobiotics.by.cytochrome.P450"
## [15] "Naphthalene.degradation"
## [16] "Nitrotoluene.degradation"
## [17] "Polycyclic.aromatic.hydrocarbon.degradation"
## [18] "Steroid.degradation"
## [19] "Styrene.degradation"
## [20] "Toluene.degradation"
## [21] "Xylene.degradation"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "Atrazine.degradation", "Benzoate.degradation",
"Bisphenol.degradation", "Caprolactam.degradation", "Chloroalkane.and.chloroalkene.degradation",
"Chlorocyclohexane.and.chlorobenzene.degradation", "Dioxin.degradation", "Drug.metabolism.cytochrome.P450",
"Drug.metabolism.other.enzymes", "Ethylbenzene.degradation", "Fluorobenzoate.degradation",
"Furfural.degradation", "Metabolism.of.xenobiotics.by.cytochrome.P450",
"Naphthalene.degradation", "Nitrotoluene.degradation", "Polycyclic.aromatic.hydrocarbon.degradation",
"Steroid.degradation", "Styrene.degradation", "Toluene.degradation",
"Xylene.degradation"))
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "AtrD", "Benzoate.degradation",
"B", "CapD", "ChloroalkaneD",
"ChlcbD", "DioxinD", "Dc",
"DrugO", "EbD", "FbD",
"FfD", "Mxc", "NaD",
"NiD", "PcahD", "SteroidD",
"StyreneD", "TolueneD", "XyleneD"))
p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold")) +
labs(caption="AtrD: Atrazine.degradation; B: Bisphenol.degradation; CapD: Caprolactam.degradation; ChlcbD: Chloroalkane.and.chloroalkene.degradation; ChlorocyclohexaneD: Chlorocyclohexane.and.chlorobenzene.degradation; DioxinD: Dioxin.degradation; Dc: Drug.metabolism.cytochrome.P450.; DrugO: Drug.metabolism.other.enzymes; EbD: Ethylbenzene.degradation; FbD: Fluorobenzoate.degradation; FfD: Furfural.degradation;
Mxc: Metabolism.of.xenobiotics.by.cytochrome; NaD: Naphthalene.degradation; NiD: Nitrotoluene.degradation; PcahD: Polycyclic.aromatic.hydrocarbon.degradation; SD: Steroid.degradation; StyreneD: Styrene.degradation; TolueneD: Toluene.degradation; XyleneD: Xylene.degradation" )
p2

#ggsave("pdf2/Fig.S8b.corrHeatmap.env.XenoBDM.2023.01.05.pdf", width = 30, height = 7)
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(rgi.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:21120] -0.38 -0.28 -0.28 0.17 0.48 -0.23 -0.1 -0.19 -0.32 -0.35 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Antibiotic resistance genes (ARG)"
p3<-ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"),
axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold"))
p3

#ggsave("pdf2/Fig.S8c.corrHeatmap.env.ARG.genes.2023.01.09.pdf", width = 30, height = 7)
#Supplementary Fig. 15
library(ggpubr)
ggarrange(p1,p2,p3,
ncol = 1,nrow = 3,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
labels = c("a","b","c"))

#ggsave("pdf/Supplementary.Fig.14.pdf",plot = last_plot(),units = "in",height = 21,width = 30,dpi = 600)
#Supplementary Fig. 16
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="Energy metabolism",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Carbon.fixation.pathways.in.prokaryotes"
## [2] "Methane.metabolism"
## [3] "Nitrogen.metabolism"
## [4] "Oxidative.phosphorylation"
## [5] "Sulfur.metabolism"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Carbon.fixation", "Methane.metabolism", "Nitrogen.M", "Oxidative.phosphorylation", "Sulfur.metabolism" ))
p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "blue"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=8, face="bold")) +
labs(caption="Nitrogen.M: Nitrogen metabolism")
p1

#ggsave("pdf2/Fig.S9a.corrHeatmap.env.Energy.metabolism.2023.01.05.pdf", width = 10, height = 3)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="Membrane transport",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "ABC.transporters" "Phosphotransferase.system"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("ABC.transporters" , "PS" ))
p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "blue"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=8, face="bold")) +
labs(caption="PS: Phosphotransferase.system")
p2

#ggsave("pdf2/Fig.S9b.corrHeatmap.env.Membrane.transport.2023.01.05.pdf", width = 10, height = 3)
#Supplementary Fig. 16
library(ggpubr)
ggarrange(p1,p2,
ncol = 1,nrow = 2,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
labels = c("a","b"))

#ggsave("pdf/Supplementary.Fig.15.pdf",plot = last_plot(),units = "in",height = 6,width = 10,dpi = 600)
#Supplementary Fig. 17
rm(list = ls())
load("data/CForBio.data.prep.2022.11.22.RData")
library(vegan)
cutoff <- 1000
sort(colSums(bac.amp1))
## BD1623 DL1904 NG14 XS0601 BTM1515 GT0910 NG7 BD1512 TT1419 CB2424
## 24903 26972 27053 27234 29143 31098 33452 34090 34339 34770
## GT2515 TT0302 DHS0824 HS1433 HS1531 TT1201 HS2325 BTM2317 GT0206 DHS0911
## 34826 35210 35306 35369 35611 35751 36395 36829 36923 37867
## LS11 DHS0808 BTM1323 NG8 CB2009 XS0809 DL0810 LS9 DL0906 XS2022
## 39954 41573 42171 42958 43395 45875 45977 46174 46676 47364
## GH1 GH4 BD1203 LS15 CB0122 GH2
## 48535 54597 55685 60392 65051 65829
sort(colSums(bac.mgm1))
## HS1433 BD1623 NG7 XS2022 DHS0824 DHS0808 DL1904 BTM1323
## 9115574 9597402 9667544 9737388 9875341 10013817 10016208 10110020
## TT1419 HS2325 GT0910 DL0810 GT2515 GT0206 TT0302 NG8
## 10160188 10164701 10195067 10301253 10395324 10534807 10643744 10648853
## XS0809 HS1531 LS11 CB2424 BTM2317 XS0601 GH1 DL0906
## 10675545 10684100 10970718 11115704 11143274 11320447 11461977 11544870
## BD1512 GH2 NG14 DHS0911 CB0122 CB2009 BD1203 TT1201
## 11738899 11750163 11847992 11987853 12237915 12559810 12632900 12911304
## LS9 LS15 GH4 BTM1515
## 13903753 14521553 14922282 15939544
sort(colSums(ko.mgm1))
## HS1433 HS1531 BD1512 HS2325 DHS0911 BD1203 CB0122 XS2022
## 516612.0 554923.2 561838.9 564412.5 570647.8 572166.7 578939.1 581887.7
## BD1623 TT1419 GH4 XS0809 CB2009 TT0302 DHS0824 GT0206
## 585609.5 588950.4 590928.8 591823.2 595951.2 597472.6 597523.9 605316.2
## GT0910 DHS0808 XS0601 TT1201 LS9 LS15 NG8 GT2515
## 607017.0 611526.6 612949.0 618809.8 618974.8 621618.7 623901.4 624726.0
## NG14 NG7 BTM1323 BTM1515 LS11 BTM2317 CB2424 GH2
## 626956.5 630290.3 640564.2 640670.3 653233.2 657988.6 661383.3 664885.4
## GH1 DL0906 DL0810 DL1904
## 670864.8 682216.2 709302.7 712869.3
sort(colSums(cog.mgm1))
## HS1433 BD1512 CB0122 DHS0911 HS1531 BD1623 HS2325 GH4
## 665137.6 700954.6 701771.9 706113.6 707706.2 717680.3 719426.8 719907.9
## BD1203 TT1419 XS2022 CB2009 XS0809 NG8 TT0302 LS9
## 720810.2 735415.6 736115.9 736728.3 737294.4 738141.8 741127.0 741129.8
## DHS0824 NG14 GT0206 NG7 LS15 GT0910 DHS0808 XS0601
## 742211.5 745558.0 745871.5 746178.6 747159.8 748678.5 749599.8 753834.3
## BTM2317 BTM1323 BTM1515 TT1201 LS11 GT2515 CB2424 DL0906
## 761668.5 762288.9 766129.1 769869.2 772483.2 780307.3 785374.0 785610.6
## GH2 GH1 DL1904 DL0810
## 806012.5 810165.7 822934.2 825360.4
sort(colSums(rgi.mgm1))
## GH4 CB0122 CB2009 NG8 BD1623 NG7 NG14 DL0906
## 20995.21 23022.58 24293.34 24840.47 25081.28 25099.30 25294.91 25324.26
## BD1512 LS15 HS1433 LS9 TT1419 LS11 BD1203 BTM2317
## 25426.07 25842.94 25856.23 25965.01 26532.75 26701.09 26808.88 27092.64
## DHS0911 TT0302 XS0601 XS0809 CB2424 XS2022 GT0206 BTM1515
## 27105.60 27138.77 27223.78 27593.08 27629.52 27753.26 27775.72 27778.20
## GT0910 TT1201 GH2 DL0810 DL1904 HS2325 DHS0824 DHS0808
## 28043.34 28054.02 28314.02 28403.45 28474.85 28496.89 28701.60 28718.88
## GH1 HS1531 BTM1323 GT2515
## 28730.37 29012.93 29564.26 30208.16
sort(colSums(cazy.mgm1))
## BD1623 GH4 BD1512 NG8 NG7 NG14 CB0122 XS0601
## 13535.55 13791.28 14072.59 14476.19 14543.41 14716.72 14718.56 14858.62
## CB2009 BD1203 GT0910 GT0206 DL0906 CB2424 LS9 LS15
## 15158.24 15372.90 15421.68 15423.15 15490.17 15512.52 15621.44 15661.32
## LS11 BTM2317 XS0809 HS1433 HS2325 XS2022 DL1904 DL0810
## 15742.96 15933.04 16022.71 16071.16 16630.05 16715.92 16996.28 17006.50
## GH2 GT2515 DHS0911 GH1 HS1531 BTM1515 TT1419 DHS0824
## 17012.91 17096.55 17106.44 17144.00 17227.44 17361.87 17600.44 17729.42
## BTM1323 TT0302 TT1201 DHS0808
## 17929.15 18253.39 18461.59 18792.74
bac.amp <- rrarefy(t(bac.amp1),sample = 24903 )
bac.mgm <- data.frame(t(bac.mgm1))
ko.mgm <- data.frame(t(ko.mgm1))
cog.mgm <- data.frame(t(cog.mgm1))
rgi.mgm <- data.frame(t(rgi.mgm1))
cazy.mgm <- data.frame(t(cazy.mgm1))
env.amp1$rgi.mgm.H <- vegan::diversity(rgi.mgm)
env.amp1$cazy.mgm.H <- vegan::diversity(cazy.mgm)
env.amp1$ko.mgm.H <- vegan::diversity(ko.mgm)
env.amp1$cog.mgm.H <- vegan::diversity(cog.mgm)
env.amp1$bac.mgm.H <- vegan::diversity(bac.mgm)
env.amp1$bac.amp.H <- vegan::diversity(bac.amp)
env.amp1$rgi.mgm.S <- vegan::specnumber(rgi.mgm)
env.amp1$cazy.mgm.S <- vegan::specnumber(cazy.mgm)
env.amp1$ko.mgm.S <- vegan::specnumber(ko.mgm)
env.amp1$cog.mgm.S <- vegan::specnumber(cog.mgm)
env.amp1$bac.mgm.S <- vegan::specnumber(bac.mgm)
env.amp1$bac.amp.S <- vegan::specnumber(bac.amp)
env.amp1$cog.mgm.sum <- rowSums(cog.mgm)
env.amp1$rgi.mgm.sum <- rowSums(rgi.mgm)
env.amp1$cazy.mgm.sum <- rowSums(cazy.mgm)
env.amp1$ko.mgm.sum <- rowSums(ko.mgm)
env.amp1$bac.mgm.sum <- rowSums(bac.mgm)
#func <- c("Ribosome","Aminoacyl.tRNA.biosynthesis")
#nam <- "Translation"
#hi <- 20
Heatmap.genes <- function (func, nam, hi){
ko.ID<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "KEGG3")
ko.ID.sub <- data.frame(ko.ID[ko.ID$Level3 %in% func,])
ko.mgm.sub <- ko.mgm1[row.names(ko.mgm1) %in% ko.ID.sub$KO,]
ko.ID.sub <- ko.ID.sub[ko.ID.sub$KO %in% row.names(ko.mgm1) ,]
ko.ID.sub <- ko.ID.sub[order(ko.ID.sub$KO),]
row.names(ko.mgm.sub) == ko.ID.sub$KO
ko.ID.sub$KO.id <- paste0(ko.ID.sub$Level3, "_",ko.ID.sub$KO)
row.names(ko.mgm.sub) <- ko.ID.sub$KO.id
ko.mgm.sub <- ko.mgm.sub[order(ko.ID.sub$No),]
env.raw1<-env.amp1
rownames(env.raw1)<-env.raw1$sample_name
env.sel<-env.raw1[row.names(env.raw1) %in% colnames(ko.mgm.sub),]
function.sel<-data.frame(t(ko.mgm.sub))
row.names(env.sel) == row.names(function.sel)
env.sel[, paste0(func, ".RA")] <- rowSums(function.sel)
env.sel[, paste0(func, ".H")] <- vegan::diversity(function.sel)
env.sel[, paste0(func, ".S")] <- vegan::specnumber(function.sel)
cog.mgm <- data.frame(t(cog.mgm1))
row.names(cog.mgm) == row.names(env.sel)
env.sel$cog.mgm.sum <- rowSums(cog.mgm)
env.cog <- data.frame(env.sel,cog.mgm )
env.cog$rib.pct <- env.cog$J/env.cog$cog.mgm.sum
env.cog$transcript.pct <- env.cog$K/env.cog$cog.mgm.sum
env.cog$defense.pct <- env.cog$V/env.cog$cog.mgm.sum
env.tmp1<-env.cog[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp1<-data.frame(apply(env.tmp1,2,as.numeric))
env.tmp2 <- function.sel
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
#cor.out$Y
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_1 <- factor(cor.out$Y_1, levels = func)
p0<-ggplot(cor.out, aes(Y_2,X)) +
facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
geom_tile(aes(fill = r), size=1)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90),
axis.text.y=element_text(colour="black", size=12, face="bold"))
#print(p0)
cor.out.sig <-cor.out
cor.out.sig$r[cor.out.sig$p > 0.05] <- 0
p1<-ggplot(cor.out.sig, aes(Y_2,X)) +
facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
geom_tile(aes(fill = r), size=1)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1, font="bold")+scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "orange")+
theme_bw()+
theme(axis.title= element_blank(),
strip.text = element_text(size = 12,face="bold", color = "white"),
strip.background = element_rect(fill = "blue"),
panel.spacing = unit(0, "lines"),
axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90,vjust = 0.5),
axis.text.y=element_text(colour="black", size=12,face="bold"))
#ggsave(paste0("pdf/Supplementary.Fig.11.Heatmap2.",nam,".2023.01.05.pdf"), width = hi, height = 6)
print(p1)
}
Heatmap.genes( c("Ribosome","Aminoacyl.tRNA.biosynthesis"), "Translation", 20 )
## num [1:3280] 0.43 0.28 0.12 -0.47 -0.41 0.33 0.28 0.52 0.52 0.71 ...

R Markdown